From 413e190916303bbb1ddf8556ef15d27e0f8a0354 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 10 Jan 2024 15:10:39 +0000 Subject: [PATCH 001/439] enable optimize_functions_to_subcolumns y default --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0e6da579b10..5200e9f775a 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -580,7 +580,7 @@ class IColumn; M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \ M(Bool, optimize_monotonous_functions_in_order_by, false, "Replace monotonous function with its argument in ORDER BY", 0) \ - M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ + M(Bool, optimize_functions_to_subcolumns, true, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \ M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \ M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \ From c1fc12fd35ab3a41edceb68ec3679bed11a69577 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 10 Jan 2024 22:51:06 +0000 Subject: [PATCH 002/439] some fixes for optimize_functions_to_subcolumns --- .../Passes/FunctionToSubcolumnsPass.cpp | 223 +++++++++++++++--- .../RewriteFunctionToSubcolumnVisitor.cpp | 2 +- .../RewriteFunctionToSubcolumnVisitor.h | 1 + src/Interpreters/TreeOptimizer.cpp | 41 +++- 4 files changed, 231 insertions(+), 36 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index cd635f87e0e..932e715a935 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -23,48 +23,194 @@ namespace DB namespace { -class FunctionToSubcolumnsVisitor : public InDepthQueryTreeVisitorWithContext +std::tuple getTypedNodesForOptimization(const QueryTreeNodePtr & node) +{ + auto * function_node = node->as(); + if (!function_node) + return {}; + + auto & function_arguments_nodes = function_node->getArguments().getNodes(); + if (function_arguments_nodes.empty() || function_arguments_nodes.size() > 2) + return {}; + auto * first_argument_column_node = function_arguments_nodes.front()->as(); + if (!first_argument_column_node) + return {}; + + auto column_source = first_argument_column_node->getColumnSource(); + auto * table_node = column_source->as(); + if (!table_node) + return {}; + + if (!table_node->getStorageSnapshot()) + return {}; + + if (!table_node->getStorage()->supportsSubcolumns()) + return {}; + + return std::make_tuple(function_node, first_argument_column_node, table_node); +} + +class FunctionToSubcolumnsVisitorFirstPass : public InDepthQueryTreeVisitorWithContext { public: - using Base = InDepthQueryTreeVisitorWithContext; + using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void enterImpl(QueryTreeNodePtr & node) const + struct Data { - if (!getSettings().optimize_functions_to_subcolumns) + std::unordered_set all_key_columns; + std::unordered_map indentifiers_count; + std::unordered_map optimized_identifiers_count; + }; + + Data getData() const { return data; } + + void enterImpl(const QueryTreeNodePtr & node) + { + if (auto * column_node = node->as()) + { + enterImpl(*column_node); return; + } - auto * function_node = node->as(); - if (!function_node) + auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node); + if (function_node && first_argument_node && table_node) + { + enterImpl(*function_node, *first_argument_node, *table_node); return; + } + } - auto & function_arguments_nodes = function_node->getArguments().getNodes(); - size_t function_arguments_nodes_size = function_arguments_nodes.size(); - - if (function_arguments_nodes.empty() || function_arguments_nodes_size > 2) - return; - - auto * first_argument_column_node = function_arguments_nodes.front()->as(); - - if (!first_argument_column_node) - return; - - auto column_source = first_argument_column_node->getColumnSource(); +private: + void enterImpl(const ColumnNode & column_node) + { + auto column_source = column_node.getColumnSource(); auto * table_node = column_source->as(); - if (!table_node) return; - const auto & storage = table_node->getStorage(); - if (!storage->supportsSubcolumns()) - return; + auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); + Identifier qualified_name({table_name, column_node.getColumnName()}); - auto column = first_argument_column_node->getColumn(); + ++data.indentifiers_count[qualified_name]; + + if (processed_tables.emplace(table_name).second) + { + const auto & metadata_snapshot = table_node->getStorageSnapshot()->metadata; + + auto add_key_columns = [&](const auto & key_columns) + { + for (const auto & column_name : key_columns) + { + Identifier identifier({table_name, column_name}); + data.all_key_columns.insert(identifier); + } + }; + + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + + const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey(); + add_key_columns(primary_key_columns); + + const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); + add_key_columns(partition_key_columns); + + for (const auto & index : metadata_snapshot->getSecondaryIndices()) + { + const auto & index_columns = index.expression->getRequiredColumns(); + add_key_columns(index_columns); + } + } + } + + void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node) + { + const auto & function_arguments_nodes = function_node.getArguments().getNodes(); + const auto & function_name = function_node.getFunctionName(); + + auto column = first_argument_column_node.getColumn(); WhichDataType column_type(column.type); + auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); + Identifier qualified_name({table_name, column.name}); + + if (function_arguments_nodes.size() == 1) + { + if (column_type.isArray()) + { + if (function_name == "length" || function_name == "empty" || function_name == "notEmpty") + ++data.optimized_identifiers_count[qualified_name]; + } + else if (column_type.isNullable()) + { + if (function_name == "isNull" || function_name == "isNotNull") + ++data.optimized_identifiers_count[qualified_name]; + } + else if (column_type.isMap()) + { + if (function_name == "mapKeys" || function_name == "mapValues") + ++data.optimized_identifiers_count[qualified_name]; + } + } + else if (function_arguments_nodes.size() == 2) + { + const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); + if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) + { + /** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` + * with `tuple_argument.column_name`. + */ + const auto & tuple_element_constant_value = second_argument_constant_node->getValue(); + const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType(); + + if (tuple_element_constant_value_type == Field::Types::String || tuple_element_constant_value_type == Field::Types::UInt64) + ++data.optimized_identifiers_count[qualified_name]; + } + else if (function_name == "mapContains" && column_type.isMap()) + { + ++data.optimized_identifiers_count[qualified_name]; + } + } + } + + Data data; + NameSet processed_tables; +}; + + +class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext +{ +public: + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; + + FunctionToSubcolumnsVisitorSecondPass(ContextPtr context_, std::unordered_set identifiers_to_optimize_) + : Base(std::move(context_)), identifiers_to_optimize(std::move(identifiers_to_optimize_)) + { + } + + void enterImpl(QueryTreeNodePtr & node) const + { + auto [function_node, first_argument_column_node, table_node] = getTypedNodesForOptimization(node); + if (!function_node || !first_argument_column_node || !table_node) + return; + + auto & function_arguments_nodes = function_node->getArguments().getNodes(); const auto & function_name = function_node->getFunctionName(); - if (function_arguments_nodes_size == 1) + auto column = first_argument_column_node->getColumn(); + auto column_source = first_argument_column_node->getColumnSource(); + WhichDataType column_type(column.type); + + auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); + Identifier qualified_name({table_name, column.name}); + + if (!identifiers_to_optimize.contains(qualified_name)) + return; + + if (function_arguments_nodes.size() == 1) { if (column_type.isArray()) { @@ -72,7 +218,6 @@ public: { /// Replace `length(array_argument)` with `array_argument.size0` column.name += ".size0"; - node = std::make_shared(column, column_source); } else if (function_name == "empty") @@ -106,7 +251,6 @@ public: { /// Replace `isNull(nullable_argument)` with `nullable_argument.null` column.name += ".null"; - node = std::make_shared(column, column_source); } else if (function_name == "isNotNull") @@ -140,10 +284,9 @@ public: } } } - else + else if (function_arguments_nodes.size() == 2) { const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); - if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) { /** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` @@ -193,6 +336,8 @@ public: } private: + std::unordered_set identifiers_to_optimize; + inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const { auto function = FunctionFactory::instance().get(function_name, getContext()); @@ -204,8 +349,26 @@ private: void FunctionToSubcolumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) { - FunctionToSubcolumnsVisitor visitor(context); - visitor.visit(query_tree_node); + if (!context->getSettingsRef().optimize_functions_to_subcolumns) + return; + + std::unordered_set identifiers_to_optimize; + + { + FunctionToSubcolumnsVisitorFirstPass visitor(context); + visitor.visit(query_tree_node); + + auto data = visitor.getData(); + for (const auto & [identifier, count] : data.optimized_identifiers_count) + if (!data.all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) + identifiers_to_optimize.insert(identifier); + } + + if (!identifiers_to_optimize.empty()) + { + FunctionToSubcolumnsVisitorSecondPass visitor(std::move(context), std::move(identifiers_to_optimize)); + visitor.visit(query_tree_node); + } } } diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 506fa13b7ba..5747ce5a3a1 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -78,7 +78,7 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) const auto & columns = metadata_snapshot->getColumns(); const auto & name_in_storage = identifier->name(); - if (!columns.has(name_in_storage)) + if (!columns.has(name_in_storage) || forbidden_identifiers.contains(name_in_storage)) return; const auto & column_type = columns.get(name_in_storage).type; diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h index 4d064bdee10..3c945da92ec 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h @@ -17,6 +17,7 @@ public: void visit(ASTFunction & function, ASTPtr & ast) const; StorageMetadataPtr metadata_snapshot; + IdentifierNameSet forbidden_identifiers; }; using RewriteFunctionToSubcolumnMatcher = OneTypeMatcher; diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 729e2ed6007..24599ed0044 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -656,10 +656,41 @@ void transformIfStringsIntoEnum(ASTPtr & query) ConvertStringsToEnumVisitor(convert_data).visit(query); } -void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & metadata_snapshot) +void optimizeFunctionsToSubcolumns(ASTPtr & query, const TreeRewriterResult & result) { - RewriteFunctionToSubcolumnVisitor::Data data{metadata_snapshot}; - RewriteFunctionToSubcolumnVisitor(data).visit(query); + if (!result.storage || !result.storage->supportsSubcolumns() || !result.storage_snapshot) + return; + + const auto & metadata_snapshot = result.storage_snapshot->metadata; + // const auto & select_query = assert_cast(*query); + + /// For queries with FINAL converting function to subcolumn may alter + /// special merging algorithms and produce wrong result of query. + // if (select_query.final()) + // return; + + // FindIdentifiersForbiddenToReplaceToSubcolumnsVisitor::Data data; + // FindIdentifiersForbiddenToReplaceToSubcolumnsVisitor(data).visit(query); + + IdentifierNameSet forbidden_identifiers; + + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + const auto & primary_key_columns = result.storage_snapshot->metadata->getColumnsRequiredForPrimaryKey(); + forbidden_identifiers.insert(primary_key_columns.begin(), primary_key_columns.end()); + + const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); + forbidden_identifiers.insert(partition_key_columns.begin(), partition_key_columns.end()); + + for (const auto & index : metadata_snapshot->getSecondaryIndices()) + { + const auto & index_columns = index.expression->getRequiredColumns(); + forbidden_identifiers.insert(index_columns.begin(), index_columns.end()); + } + + RewriteFunctionToSubcolumnVisitor::Data rewrite_data{metadata_snapshot, forbidden_identifiers}; + RewriteFunctionToSubcolumnVisitor(rewrite_data).visit(query); } void optimizeOrLikeChain(ASTPtr & query) @@ -726,8 +757,8 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, if (!select_query) throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts."); - if (settings.optimize_functions_to_subcolumns && result.storage_snapshot && result.storage->supportsSubcolumns()) - optimizeFunctionsToSubcolumns(query, result.storage_snapshot->metadata); + if (settings.optimize_functions_to_subcolumns) + optimizeFunctionsToSubcolumns(query, result); /// Move arithmetic operations out of aggregation functions if (settings.optimize_arithmetic_operations_in_aggregate_functions) From f79202bd532594cd441be928a11691a51fe88e65 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 11 Jan 2024 16:12:41 +0000 Subject: [PATCH 003/439] some fixes for optimize_functions_to_subcolumns --- .../Passes/FunctionToSubcolumnsPass.cpp | 57 ++++++++++++------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 932e715a935..d901692ba27 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace DB { @@ -32,19 +33,14 @@ std::tuple getTypedNodesForOptimizati auto & function_arguments_nodes = function_node->getArguments().getNodes(); if (function_arguments_nodes.empty() || function_arguments_nodes.size() > 2) return {}; + auto * first_argument_column_node = function_arguments_nodes.front()->as(); if (!first_argument_column_node) return {}; auto column_source = first_argument_column_node->getColumnSource(); auto * table_node = column_source->as(); - if (!table_node) - return {}; - - if (!table_node->getStorageSnapshot()) - return {}; - - if (!table_node->getStorage()->supportsSubcolumns()) + if (!table_node || !table_node->getStorageSnapshot() || !table_node->getStorage()->supportsSubcolumns()) return {}; return std::make_tuple(function_node, first_argument_column_node, table_node); @@ -58,6 +54,7 @@ public: struct Data { + bool has_final = false; std::unordered_set all_key_columns; std::unordered_map indentifiers_count; std::unordered_map optimized_identifiers_count; @@ -67,12 +64,31 @@ public: void enterImpl(const QueryTreeNodePtr & node) { + if (data.has_final) + return; + if (auto * column_node = node->as()) { enterImpl(*column_node); return; } + if (auto * table_node = node->as()) + { + if (table_node->hasTableExpressionModifiers() + && table_node->getTableExpressionModifiers()->hasFinal()) + data.has_final = true; + return; + } + + if (auto * table_function_node = node->as()) + { + if (table_function_node->hasTableExpressionModifiers() + && table_function_node->getTableExpressionModifiers()->hasFinal()) + data.has_final = true; + return; + } + auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node); if (function_node && first_argument_node && table_node) { @@ -159,9 +175,6 @@ private: const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) { - /** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` - * with `tuple_argument.column_name`. - */ const auto & tuple_element_constant_value = second_argument_constant_node->getValue(); const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType(); @@ -352,22 +365,22 @@ void FunctionToSubcolumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr if (!context->getSettingsRef().optimize_functions_to_subcolumns) return; + FunctionToSubcolumnsVisitorFirstPass first_visitor(context); + first_visitor.visit(query_tree_node); + auto data = first_visitor.getData(); + + if (data.has_final) + return; + std::unordered_set identifiers_to_optimize; - - { - FunctionToSubcolumnsVisitorFirstPass visitor(context); - visitor.visit(query_tree_node); - - auto data = visitor.getData(); - for (const auto & [identifier, count] : data.optimized_identifiers_count) - if (!data.all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) - identifiers_to_optimize.insert(identifier); - } + for (const auto & [identifier, count] : data.optimized_identifiers_count) + if (!data.all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) + identifiers_to_optimize.insert(identifier); if (!identifiers_to_optimize.empty()) { - FunctionToSubcolumnsVisitorSecondPass visitor(std::move(context), std::move(identifiers_to_optimize)); - visitor.visit(query_tree_node); + FunctionToSubcolumnsVisitorSecondPass second_visitor(std::move(context), std::move(identifiers_to_optimize)); + second_visitor.visit(query_tree_node); } } From cb4c78af59abca8788dfbd880fa6b7042fca3e6c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 11 Jan 2024 23:13:04 +0000 Subject: [PATCH 004/439] fix optimize_functions_to_subcolumns with old analyzer --- .../Passes/FunctionToSubcolumnsPass.cpp | 131 ++++++++++-------- src/DataTypes/DataTypeTuple.cpp | 4 +- .../RewriteFunctionToSubcolumnVisitor.cpp | 126 +++++++++++++---- .../RewriteFunctionToSubcolumnVisitor.h | 34 ++++- src/Interpreters/TreeOptimizer.cpp | 50 +++++-- .../02286_tuple_numeric_identifier.sql | 6 +- 6 files changed, 238 insertions(+), 113 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index d901692ba27..82d50f5fdb1 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -40,7 +40,7 @@ std::tuple getTypedNodesForOptimizati auto column_source = first_argument_column_node->getColumnSource(); auto * table_node = column_source->as(); - if (!table_node || !table_node->getStorageSnapshot() || !table_node->getStorage()->supportsSubcolumns()) + if (!table_node || !table_node->getStorage()->supportsSubcolumns()) return {}; return std::make_tuple(function_node, first_argument_column_node, table_node); @@ -67,28 +67,18 @@ public: if (data.has_final) return; + if (auto * table_node = node->as()) + { + enterImpl(*table_node); + return; + } + if (auto * column_node = node->as()) { enterImpl(*column_node); return; } - if (auto * table_node = node->as()) - { - if (table_node->hasTableExpressionModifiers() - && table_node->getTableExpressionModifiers()->hasFinal()) - data.has_final = true; - return; - } - - if (auto * table_function_node = node->as()) - { - if (table_function_node->hasTableExpressionModifiers() - && table_function_node->getTableExpressionModifiers()->hasFinal()) - data.has_final = true; - return; - } - auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node); if (function_node && first_argument_node && table_node) { @@ -98,6 +88,45 @@ public: } private: + Data data; + NameSet processed_tables; + + void enterImpl(const TableNode & table_node) + { + if (table_node.hasTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal()) + { + data.has_final = true; + return; + } + + auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); + if (processed_tables.emplace(table_name).second) + return; + + auto add_key_columns = [&](const auto & key_columns) + { + for (const auto & column_name : key_columns) + { + Identifier identifier({table_name, column_name}); + data.all_key_columns.insert(identifier); + } + }; + + const auto & metadata_snapshot = table_node.getStorageSnapshot()->metadata; + + const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey(); + add_key_columns(primary_key_columns); + + const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); + add_key_columns(partition_key_columns); + + for (const auto & index : metadata_snapshot->getSecondaryIndices()) + { + const auto & index_columns = index.expression->getRequiredColumns(); + add_key_columns(index_columns); + } + } + void enterImpl(const ColumnNode & column_node) { auto column_source = column_node.getColumnSource(); @@ -109,36 +138,6 @@ private: Identifier qualified_name({table_name, column_node.getColumnName()}); ++data.indentifiers_count[qualified_name]; - - if (processed_tables.emplace(table_name).second) - { - const auto & metadata_snapshot = table_node->getStorageSnapshot()->metadata; - - auto add_key_columns = [&](const auto & key_columns) - { - for (const auto & column_name : key_columns) - { - Identifier identifier({table_name, column_name}); - data.all_key_columns.insert(identifier); - } - }; - - /// Do not optimize index columns (primary, min-max, secondary), - /// because otherwise analysis of indexes may be broken. - /// TODO: handle subcolumns in index analysis. - - const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey(); - add_key_columns(primary_key_columns); - - const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); - add_key_columns(partition_key_columns); - - for (const auto & index : metadata_snapshot->getSecondaryIndices()) - { - const auto & index_columns = index.expression->getRequiredColumns(); - add_key_columns(index_columns); - } - } } void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node) @@ -187,9 +186,6 @@ private: } } } - - Data data; - NameSet processed_tables; }; @@ -214,15 +210,15 @@ public: const auto & function_name = function_node->getFunctionName(); auto column = first_argument_column_node->getColumn(); - auto column_source = first_argument_column_node->getColumnSource(); - WhichDataType column_type(column.type); - auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); - Identifier qualified_name({table_name, column.name}); + Identifier qualified_name({table_name, column.name}); if (!identifiers_to_optimize.contains(qualified_name)) return; + auto column_source = first_argument_column_node->getColumnSource(); + WhichDataType column_type(column.type); + if (function_arguments_nodes.size() == 1) { if (column_type.isArray()) @@ -369,19 +365,36 @@ void FunctionToSubcolumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr first_visitor.visit(query_tree_node); auto data = first_visitor.getData(); + /// For queries with FINAL converting function to subcolumn may alter + /// special merging algorithms and produce wrong result of query. if (data.has_final) return; + /// Do not optimize if full column is requested in other context. + /// It doesn't make sense because it doesn't reduce amount of read data + /// and optimized functions are not computation heavy. But introducing + /// new identifier complicates query analysis and may break it. + /// + /// E.g. query: + /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) + /// may be optimized to incorrect query: + /// SELECT n FROM table GROUP BY n HAVING not(n.null) + /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) + /// + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + std::unordered_set identifiers_to_optimize; for (const auto & [identifier, count] : data.optimized_identifiers_count) if (!data.all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) identifiers_to_optimize.insert(identifier); - if (!identifiers_to_optimize.empty()) - { - FunctionToSubcolumnsVisitorSecondPass second_visitor(std::move(context), std::move(identifiers_to_optimize)); - second_visitor.visit(query_tree_node); - } + if (identifiers_to_optimize.empty()) + return; + + FunctionToSubcolumnsVisitorSecondPass second_visitor(std::move(context), std::move(identifiers_to_optimize)); + second_visitor.visit(query_tree_node); } } diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 9cce59b0dca..6edbb8b27eb 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -32,7 +32,7 @@ namespace ErrorCodes extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH; - extern const int ILLEGAL_INDEX; + extern const int ARGUMENT_OUT_OF_BOUND; extern const int LOGICAL_ERROR; } @@ -270,7 +270,7 @@ std::optional DataTypeTuple::tryGetPositionByName(const String & name) c String DataTypeTuple::getNameByPosition(size_t i) const { if (i == 0 || i > names.size()) - throw Exception(ErrorCodes::ILLEGAL_INDEX, "Index of tuple element ({}) if out range ([1, {}])", i, names.size()); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index of tuple element ({}) is out range ([1, {}])", i, names.size()); return names[i - 1]; } diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 5747ce5a3a1..2a235ae31e4 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -63,42 +63,104 @@ const std::unordered_map getColumnFromArgumentsToOptimize( + const ASTs & arguments, + const StorageMetadataPtr & metadata_snapshot) { - const auto & arguments = function.arguments->children; if (arguments.empty() || arguments.size() > 2) - return; + return {}; const auto * identifier = arguments[0]->as(); if (!identifier) - return; + return {}; const auto & columns = metadata_snapshot->getColumns(); const auto & name_in_storage = identifier->name(); - if (!columns.has(name_in_storage) || forbidden_identifiers.contains(name_in_storage)) - return; + if (!columns.has(name_in_storage)) + return {}; const auto & column_type = columns.get(name_in_storage).type; - TypeIndex column_type_id = column_type->getTypeId(); + return NameAndTypePair{name_in_storage, column_type}; +} + +} + +void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTPtr & ast, Data & data) +{ + if (const auto * identifier = ast->as()) + { + ++data.indentifiers_count[identifier->name()]; + return; + } + + if (const auto * function = ast->as()) + { + visit(*function, data); + return; + } +} + +void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & function, Data & data) +{ + const auto & arguments = function.arguments->children; + auto column = getColumnFromArgumentsToOptimize(arguments, data.metadata_snapshot); + if (!column) + return; + + auto column_type_id = column->type->getTypeId(); + + if (arguments.size() == 1) + { + auto it = unary_function_to_subcolumn.find(function.name); + if (it != unary_function_to_subcolumn.end() && std::get<0>(it->second) == column_type_id) + ++data.optimized_identifiers_count[column->name]; + } + else + { + if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) + { + const auto * literal = arguments[1]->as(); + if (!literal) + return; + + auto value_type = literal->value.getType(); + if (value_type == Field::Types::UInt64 || value_type == Field::Types::String) + ++data.optimized_identifiers_count[column->name]; + } + else + { + auto it = binary_function_to_subcolumn.find(function.name); + if (it != binary_function_to_subcolumn.end() && std::get<0>(it->second) == column_type_id) + ++data.optimized_identifiers_count[column->name]; + } + } +} + +void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, ASTPtr & ast) const +{ + const auto & arguments = function.arguments->children; + auto column = getColumnFromArgumentsToOptimize(arguments, metadata_snapshot); + if (!column) + return; + + auto column_type_id = column->type->getTypeId(); const auto & alias = function.tryGetAlias(); if (arguments.size() == 1) { auto it = unary_function_to_subcolumn.find(function.name); - if (it != unary_function_to_subcolumn.end()) - { - const auto & [type_id, subcolumn_name, transformer] = it->second; - if (column_type_id == type_id) - { - ast = transformer(name_in_storage, subcolumn_name); - ast->setAlias(alias); - } - } + if (it == unary_function_to_subcolumn.end()) + return; + + const auto & [expected_type_id, subcolumn_name, transformer] = it->second; + if (column_type_id != expected_type_id) + return; + + ast = transformer(column->name, subcolumn_name); + ast->setAlias(alias); } - else + else if (arguments.size() == 2) { if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) { @@ -110,30 +172,34 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) auto value_type = literal->value.getType(); if (value_type == Field::Types::UInt64) { - const auto & type_tuple = assert_cast(*column_type); + const auto & type_tuple = assert_cast(*column->type); auto index = literal->value.get(); subcolumn_name = type_tuple.getNameByPosition(index); } else if (value_type == Field::Types::String) + { subcolumn_name = literal->value.get(); + } else + { return; + } - ast = transformToSubcolumn(name_in_storage, subcolumn_name); + ast = transformToSubcolumn(column->name, subcolumn_name); ast->setAlias(alias); } else { auto it = binary_function_to_subcolumn.find(function.name); - if (it != binary_function_to_subcolumn.end()) - { - const auto & [type_id, subcolumn_name, transformer] = it->second; - if (column_type_id == type_id) - { - ast = transformer(name_in_storage, subcolumn_name, arguments[1]); - ast->setAlias(alias); - } - } + if (it == binary_function_to_subcolumn.end()) + return; + + const auto & [expected_type_id, subcolumn_name, transformer] = it->second; + if (column_type_id != expected_type_id) + return; + + ast = transformer(column->name, subcolumn_name, arguments[1]); + ast->setAlias(alias); } } } diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h index 3c945da92ec..08eb6e27c52 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h @@ -7,20 +7,46 @@ namespace DB { class ASTFunction; +class ASTIdentifier; + +/// Collects info about identifiers to select columns to optimize to subcolumns. +class RewriteFunctionToSubcolumnFirstPassMatcher +{ +public: + struct Data + { + explicit Data(StorageMetadataPtr metadata_snapshot_) : metadata_snapshot(std::move(metadata_snapshot_)) {} + + StorageMetadataPtr metadata_snapshot; + std::unordered_map indentifiers_count; + std::unordered_map optimized_identifiers_count; + }; + + static void visit(const ASTPtr & ast, Data & data); + static void visit(const ASTFunction & function, Data & data); + static bool needChildVisit(ASTPtr & , ASTPtr &) { return true; } +}; + +using RewriteFunctionToSubcolumnFirstPassVisitor = InDepthNodeVisitor; /// Rewrites functions to subcolumns, if possible, to reduce amount of read data. /// E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' -class RewriteFunctionToSubcolumnData +class RewriteFunctionToSubcolumnSecondPassData { public: using TypeToVisit = ASTFunction; void visit(ASTFunction & function, ASTPtr & ast) const; + RewriteFunctionToSubcolumnSecondPassData(StorageMetadataPtr metadata_snapshot_, NameSet identifiers_to_optimize_) + : metadata_snapshot(std::move(metadata_snapshot_)), identifiers_to_optimize(std::move(identifiers_to_optimize_)) + { + } + StorageMetadataPtr metadata_snapshot; - IdentifierNameSet forbidden_identifiers; + NameSet identifiers_to_optimize; }; -using RewriteFunctionToSubcolumnMatcher = OneTypeMatcher; -using RewriteFunctionToSubcolumnVisitor = InDepthNodeVisitor; +using RewriteFunctionToSubcolumnSecondPassMatcher = OneTypeMatcher; +using RewriteFunctionToSubcolumnSecondPassVisitor = InDepthNodeVisitor; } diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 3df6242fa85..07cfe897010 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -659,35 +659,55 @@ void optimizeFunctionsToSubcolumns(ASTPtr & query, const TreeRewriterResult & re return; const auto & metadata_snapshot = result.storage_snapshot->metadata; - // const auto & select_query = assert_cast(*query); + const auto & select_query = assert_cast(*query); /// For queries with FINAL converting function to subcolumn may alter /// special merging algorithms and produce wrong result of query. - // if (select_query.final()) - // return; + if (select_query.final()) + return; - // FindIdentifiersForbiddenToReplaceToSubcolumnsVisitor::Data data; - // FindIdentifiersForbiddenToReplaceToSubcolumnsVisitor(data).visit(query); + NameSet all_key_columns; - IdentifierNameSet forbidden_identifiers; - - /// Do not optimize index columns (primary, min-max, secondary), - /// because otherwise analysis of indexes may be broken. - /// TODO: handle subcolumns in index analysis. const auto & primary_key_columns = result.storage_snapshot->metadata->getColumnsRequiredForPrimaryKey(); - forbidden_identifiers.insert(primary_key_columns.begin(), primary_key_columns.end()); + all_key_columns.insert(primary_key_columns.begin(), primary_key_columns.end()); const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); - forbidden_identifiers.insert(partition_key_columns.begin(), partition_key_columns.end()); + all_key_columns.insert(partition_key_columns.begin(), partition_key_columns.end()); for (const auto & index : metadata_snapshot->getSecondaryIndices()) { const auto & index_columns = index.expression->getRequiredColumns(); - forbidden_identifiers.insert(index_columns.begin(), index_columns.end()); + all_key_columns.insert(index_columns.begin(), index_columns.end()); } - RewriteFunctionToSubcolumnVisitor::Data rewrite_data{metadata_snapshot, forbidden_identifiers}; - RewriteFunctionToSubcolumnVisitor(rewrite_data).visit(query); + /// Do not optimize if full column is requested in other context. + /// It doesn't make sense because it doesn't reduce amount of read data + /// and optimized functions are not computation heavy. But introducing + /// new identifier complicates query analysis and may break it. + /// + /// E.g. query: + /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) + /// may be optimized to incorrect query: + /// SELECT n FROM table GROUP BY n HAVING not(n.null) + /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) + /// + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + + RewriteFunctionToSubcolumnFirstPassVisitor::Data data(metadata_snapshot); + RewriteFunctionToSubcolumnFirstPassVisitor(data).visit(query); + + NameSet identifiers_to_optimize; + for (const auto & [identifier, count] : data.optimized_identifiers_count) + if (!all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) + identifiers_to_optimize.insert(identifier); + + if (identifiers_to_optimize.empty()) + return; + + RewriteFunctionToSubcolumnSecondPassVisitor::Data rewrite_data(metadata_snapshot, identifiers_to_optimize); + RewriteFunctionToSubcolumnSecondPassVisitor(rewrite_data).visit(query); } void optimizeOrLikeChain(ASTPtr & query) diff --git a/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql b/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql index f723284ad61..151ff275f7b 100644 --- a/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql +++ b/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql @@ -12,9 +12,9 @@ SELECT * FROM t_tuple_numeric FORMAT JSONEachRow; SELECT `t`.`1`.`2`, `t`.`1`.`3`, `t`.`4` FROM t_tuple_numeric; SELECT t.1.1, t.1.2, t.2 FROM t_tuple_numeric; -SELECT t.1.3 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK} -SELECT t.4 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK} -SELECT `t`.`1`.`1`, `t`.`1`.`2`, `t`.`2` FROM t_tuple_numeric; -- {serverError UNKNOWN_IDENTIFIER} +SELECT t.1.3 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK, ARGUMENT_OUT_OF_BOUND} +SELECT t.4 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK, ARGUMENT_OUT_OF_BOUND} +SELECT `t`.`1`.`1`, `t`.`1`.`2`, `t`.`2` FROM t_tuple_numeric; -- {serverError UNKNOWN_IDENTIFIER, ARGUMENT_OUT_OF_BOUND} DROP TABLE t_tuple_numeric; From 93c362a803ff924f4176f6cd8483c08d731ccb59 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 12 Jan 2024 12:06:38 +0000 Subject: [PATCH 005/439] return and fix test --- .../01600_parts_states_metrics_long.reference | 4 ++ .../01600_parts_states_metrics_long.sh | 40 +++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 tests/queries/0_stateless/01600_parts_states_metrics_long.reference create mode 100755 tests/queries/0_stateless/01600_parts_states_metrics_long.sh diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.reference b/tests/queries/0_stateless/01600_parts_states_metrics_long.reference new file mode 100644 index 00000000000..98fb6a68656 --- /dev/null +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.reference @@ -0,0 +1,4 @@ +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh new file mode 100755 index 00000000000..2e47034e528 --- /dev/null +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# NOTE: database = $CLICKHOUSE_DATABASE is unwanted +verify_sql="SELECT + (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics) + = (SELECT sum(active), sum(NOT active) FROM + (SELECT active FROM system.parts UNION ALL SELECT active FROM system.projection_parts UNION ALL SELECT 1 FROM system.dropped_tables_parts))" + +# The query is not atomic - it can compare states between system.parts and system.metrics from different points in time. +# So, there is inherent race condition. But it should get expected result eventually. +# In case of test failure, this code will do infinite loop and timeout. +verify() +{ + while true + do + result=$( $CLICKHOUSE_CLIENT -m --query="$verify_sql" ) + [ "$result" = "1" ] && break + sleep 0.1 + done + echo 1 +} + +$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS test_table" +$CLICKHOUSE_CLIENT --query="CREATE TABLE test_table(data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;" + +$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-01')" +verify + +$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-02')" +verify + +$CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE test_table FINAL" +verify + +$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE test_table" +verify From 8a0126204272fdcecd722595a2d7e64496ba7c94 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 15 Jan 2024 18:17:36 +0000 Subject: [PATCH 006/439] fix tests --- src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp | 9 ++++++--- tests/queries/0_stateless/02116_tuple_element.sql | 8 ++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 82d50f5fdb1..c5d34b5462a 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -64,6 +64,9 @@ public: void enterImpl(const QueryTreeNodePtr & node) { + if (!getSettings().optimize_functions_to_subcolumns) + return; + if (data.has_final) return; @@ -202,6 +205,9 @@ public: void enterImpl(QueryTreeNodePtr & node) const { + if (!getSettings().optimize_functions_to_subcolumns) + return; + auto [function_node, first_argument_column_node, table_node] = getTypedNodesForOptimization(node); if (!function_node || !first_argument_column_node || !table_node) return; @@ -358,9 +364,6 @@ private: void FunctionToSubcolumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) { - if (!context->getSettingsRef().optimize_functions_to_subcolumns) - return; - FunctionToSubcolumnsVisitorFirstPass first_visitor(context); first_visitor.visit(query_tree_node); auto data = first_visitor.getData(); diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element.sql index 97f6c049705..ece7114e763 100644 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ b/tests/queries/0_stateless/02116_tuple_element.sql @@ -17,8 +17,8 @@ EXPLAIN SYNTAX SELECT tupleElement(t1, 'a') FROM t_tuple_element; SELECT tupleElement(number, 1) FROM numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT t2.1 FROM t_tuple_element; @@ -29,8 +29,8 @@ EXPLAIN SYNTAX SELECT tupleElement(t2, 1) FROM t_tuple_element; SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } DROP TABLE t_tuple_element; From e6ad9dd387cc004096eee3c2bfbadf6689473203 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 17 Jan 2024 17:21:15 +0000 Subject: [PATCH 007/439] fix crash with optimize_functions_to_subcolumns --- src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 2a235ae31e4..eaf27a7ae80 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -81,6 +81,9 @@ std::optional getColumnFromArgumentsToOptimize( return {}; const auto & column_type = columns.get(name_in_storage).type; + if (column_type->hasDynamicSubcolumns()) + return {}; + return NameAndTypePair{name_in_storage, column_type}; } From 0c9926a7045d6200ad2e486f3ee3532c1cbbca16 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 18 Jan 2024 15:42:31 +0000 Subject: [PATCH 008/439] fixes for optimize_functions_to_subcolumns --- .../Passes/FunctionToSubcolumnsPass.cpp | 28 +++++++++++++++---- src/Interpreters/TreeOptimizer.cpp | 2 +- src/Storages/HDFS/StorageHDFS.h | 1 + src/Storages/HDFS/StorageHDFSCluster.h | 2 -- src/Storages/IStorage.h | 2 ++ src/Storages/IStorageCluster.h | 4 ++- src/Storages/S3Queue/StorageS3Queue.h | 1 + src/Storages/StorageAzureBlob.h | 2 ++ src/Storages/StorageAzureBlobCluster.h | 2 -- src/Storages/StorageFile.h | 1 + src/Storages/StorageFileCluster.h | 2 -- src/Storages/StorageS3.h | 1 + src/Storages/StorageS3Cluster.h | 2 -- src/Storages/StorageURL.h | 1 + src/Storages/StorageURLCluster.h | 2 -- 15 files changed, 35 insertions(+), 18 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 7b5f3a433ad..9aa785d5918 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -35,17 +35,29 @@ std::tuple getTypedNodesForOptimizati return {}; auto * first_argument_column_node = function_arguments_nodes.front()->as(); - if (!first_argument_column_node) + if (!first_argument_column_node || first_argument_column_node->getColumnName() == "__grouping_set") return {}; auto column_source = first_argument_column_node->getColumnSource(); auto * table_node = column_source->as(); - if (!table_node || !table_node->getStorage()->supportsSubcolumns()) + if (!table_node) + return {}; + + const auto & storage = table_node->getStorage(); + const auto & storage_snapshot = table_node->getStorageSnapshot(); + auto column = first_argument_column_node->getColumn(); + + if (!storage->supportsOptimizationToSubcolumns() || storage->isVirtualColumn(column.name, storage_snapshot->metadata)) + return {}; + + auto column_in_table = storage_snapshot->tryGetColumn(GetColumnsOptions::All, column.name); + if (!column_in_table || !column_in_table->type->equals(*column.type)) return {}; return std::make_tuple(function_node, first_argument_column_node, table_node); } +/// First pass collects info about identifiers to determine which identifiers are allowed to optimize. class FunctionToSubcolumnsVisitorFirstPass : public InDepthQueryTreeVisitorWithContext { public: @@ -132,6 +144,9 @@ private: void enterImpl(const ColumnNode & column_node) { + if (column_node.getColumnName() == "__grouping_set") + return; + auto column_source = column_node.getColumnSource(); auto * table_node = column_source->as(); if (!table_node) @@ -191,7 +206,7 @@ private: } }; - +/// Second pass optimizes functions to subcolumns for allowed identifiers. class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext { public: @@ -222,9 +237,6 @@ public: if (!identifiers_to_optimize.contains(qualified_name)) return; - if (first_argument_column_node->getColumnName() == "__grouping_set") - return; - auto column_source = first_argument_column_node->getColumnSource(); WhichDataType column_type(column.type); @@ -236,6 +248,8 @@ public: { /// Replace `length(array_argument)` with `array_argument.size0` column.name += ".size0"; + column.type = std::make_shared(); + node = std::make_shared(column, column_source); } else if (function_name == "empty") @@ -269,6 +283,8 @@ public: { /// Replace `isNull(nullable_argument)` with `nullable_argument.null` column.name += ".null"; + column.type = std::make_shared(); + node = std::make_shared(column, column_source); } else if (function_name == "isNotNull") diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 07cfe897010..8fab032aece 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -655,7 +655,7 @@ void transformIfStringsIntoEnum(ASTPtr & query) void optimizeFunctionsToSubcolumns(ASTPtr & query, const TreeRewriterResult & result) { - if (!result.storage || !result.storage->supportsSubcolumns() || !result.storage_snapshot) + if (!result.storage || !result.storage->supportsOptimizationToSubcolumns() || !result.storage_snapshot) return; const auto & metadata_snapshot = result.storage_snapshot->metadata; diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index f1f0019d3e0..bd36556c017 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -81,6 +81,7 @@ public: bool supportsSubsetOfColumns(const ContextPtr & context_) const; bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } static ColumnsDescription getTableStructureFromData( const String & format, diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 7c4c41a573a..f35a912129c 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -37,8 +37,6 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } private: diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 4fa6bfdd617..62faedd19ba 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -167,6 +167,8 @@ public: /// Returns true if the storage supports reading of subcolumns of complex types. virtual bool supportsSubcolumns() const { return false; } + /// Returns true if storage supports optimizations of functions by reading subcolumns. + virtual bool supportsOptimizationToSubcolumns() const { return supportsSubcolumns(); } /// Returns true if the storage supports transactions for SELECT, INSERT and ALTER queries. /// Storage may throw an exception later if some query kind is not fully supported. diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h index b233f20103d..0e466976852 100644 --- a/src/Storages/IStorageCluster.h +++ b/src/Storages/IStorageCluster.h @@ -38,7 +38,9 @@ public: QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; - bool isRemote() const override { return true; } + bool isRemote() const override final { return true; } + bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } protected: virtual void updateBeforeRead(const ContextPtr &) {} diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h index 3d3594dc2ab..0b50913546e 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -86,6 +86,7 @@ private: void drop() override; bool supportsSubsetOfColumns(const ContextPtr & context_) const; bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } std::shared_ptr createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate); std::shared_ptr createSource( diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 16e5b9edfb6..4d54f1cdcc3 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -101,6 +101,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } + bool supportsSubsetOfColumns(const ContextPtr & context) const; bool supportsTrivialCountOptimization() const override { return true; } diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h index 2831b94f825..c95e329803c 100644 --- a/src/Storages/StorageAzureBlobCluster.h +++ b/src/Storages/StorageAzureBlobCluster.h @@ -36,8 +36,6 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } private: diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index b74868597a6..db7d8be15cf 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -93,6 +93,7 @@ public: bool supportsSubsetOfColumns(const ContextPtr & context) const; bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } bool prefersLargeBlocks() const override; diff --git a/src/Storages/StorageFileCluster.h b/src/Storages/StorageFileCluster.h index a6e57c3bb4f..cb00e8870e8 100644 --- a/src/Storages/StorageFileCluster.h +++ b/src/Storages/StorageFileCluster.h @@ -33,8 +33,6 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } private: diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index b90a0d394cb..a027f96aa0a 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -386,6 +386,7 @@ private: ContextPtr ctx); bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } bool supportsSubsetOfColumns(const ContextPtr & context) const; diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index c526f14834a..81169f79746 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -36,8 +36,6 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } protected: diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 07d4d0cad38..f16f2757611 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -281,6 +281,7 @@ public: } bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } static FormatSettings getFormatSettingsFromArgs(const StorageFactory::Arguments & args); diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h index 07978040029..a555df3cd43 100644 --- a/src/Storages/StorageURLCluster.h +++ b/src/Storages/StorageURLCluster.h @@ -36,8 +36,6 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } private: From a89956bb0f614d81d3db7998ea026a0a01db8cd4 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 22 Jan 2024 19:33:34 +0000 Subject: [PATCH 009/439] more cases for optimize_functions_to_subcolumns --- ...egateFunctionsArithmericOperationsPass.cpp | 27 +++----- .../Passes/ComparisonTupleEliminationPass.cpp | 13 ++-- src/Analyzer/Passes/CountDistinctPass.cpp | 7 +-- .../Passes/FunctionToSubcolumnsPass.cpp | 58 +++++++++++------ .../Passes/NormalizeCountVariantsPass.cpp | 13 +--- ...ateOrDateTimeConverterWithPreimagePass.cpp | 25 +++----- .../RewriteAggregateFunctionWithIfPass.cpp | 25 +++----- .../RewriteSumFunctionWithSumAndCountPass.cpp | 31 ++-------- src/Analyzer/Passes/SumIfToCountIfPass.cpp | 16 ++--- .../UniqInjectiveFunctionsEliminationPass.cpp | 11 +--- src/Analyzer/Passes/UniqToCountPass.cpp | 6 +- src/Analyzer/Utils.cpp | 22 +++++++ src/Analyzer/Utils.h | 8 +++ src/Interpreters/InterpreterExplainQuery.cpp | 26 +++++--- .../RewriteFunctionToSubcolumnVisitor.cpp | 62 +++++++------------ .../01872_functions_to_subcolumns.reference | 18 +++--- .../01872_functions_to_subcolumns.sql | 1 - ...functions_to_subcolumns_analyzer.reference | 50 +++++++++++++++ ...01872_functions_to_subcolumns_analyzer.sql | 42 +++++++++++++ .../0_stateless/02115_map_contains.reference | 2 +- .../02115_map_contains_analyzer.reference | 4 ++ .../02115_map_contains_analyzer.sql | 13 ++++ .../0_stateless/02116_tuple_element.reference | 10 +-- .../02116_tuple_element_analyzer.reference | 25 ++++++++ .../02116_tuple_element_analyzer.sql | 43 +++++++++++++ ...tions_to_subcolumns_column_names.reference | 14 +++++ ...1_functions_to_subcolumns_column_names.sql | 19 ++++++ ...2971_functions_to_subcolumns_map.reference | 8 +++ .../02971_functions_to_subcolumns_map.sql | 19 ++++++ 29 files changed, 411 insertions(+), 207 deletions(-) create mode 100644 tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference create mode 100644 tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql create mode 100644 tests/queries/0_stateless/02115_map_contains_analyzer.reference create mode 100644 tests/queries/0_stateless/02115_map_contains_analyzer.sql create mode 100644 tests/queries/0_stateless/02116_tuple_element_analyzer.reference create mode 100644 tests/queries/0_stateless/02116_tuple_element_analyzer.sql create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index baecb372c2d..b8a477b8523 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB { @@ -165,31 +166,17 @@ private: auto aggregate_function_clone = aggregate_function->clone(); auto & aggregate_function_clone_typed = aggregate_function_clone->as(); aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument }; - resolveAggregateFunctionNode(aggregate_function_clone_typed, arithmetic_function_clone_argument, result_aggregate_function_name); + + resolveAggregateFunctionNodeByName( + aggregate_function_clone_typed, + result_aggregate_function_name, + {arithmetic_function_clone_argument->getResultType()}); arithmetic_function_clone_arguments_nodes[arithmetic_function_argument_index] = std::move(aggregate_function_clone); - resolveOrdinaryFunctionNode(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName()); + resolveOrdinaryFunctionNodeByName(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName(), getContext()); return arithmetic_function_clone; } - - inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } - - static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name) - { - auto function_aggregate_function = function_node.getAggregateFunction(); - - AggregateFunctionProperties properties; - auto action = NullsAction::EMPTY; - auto aggregate_function = AggregateFunctionFactory::instance().get( - aggregate_function_name, action, {argument->getResultType()}, function_aggregate_function->getParameters(), properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } }; } diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp index 7c38ba81c70..42b53f667b4 100644 --- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp +++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB { @@ -171,13 +172,13 @@ private: { auto result_function = std::make_shared("and"); result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions); - resolveOrdinaryFunctionNode(*result_function, result_function->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), context); if (comparison_function_name == "notEquals") { auto not_function = std::make_shared("not"); not_function->getArguments().getNodes().push_back(std::move(result_function)); - resolveOrdinaryFunctionNode(*not_function, not_function->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), context); result_function = std::move(not_function); } @@ -197,17 +198,11 @@ private: comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument)); comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument)); - resolveOrdinaryFunctionNode(*comparison_function, comparison_function->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), context); return comparison_function; } - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, context); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } - ContextPtr context; }; diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp index 07a031fe4e8..a73ca4befcf 100644 --- a/src/Analyzer/Passes/CountDistinctPass.cpp +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB { @@ -77,11 +78,9 @@ public: /// Replace `countDistinct` of initial query into `count` auto result_type = function_node->getResultType(); - AggregateFunctionProperties properties; - auto action = NullsAction::EMPTY; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", action, {}, {}, properties); - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + function_node->getArguments().getNodes().clear(); + resolveAggregateFunctionNodeByName(*function_node, "count", {}); } }; diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 9aa785d5918..ac13a505a52 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB { @@ -178,12 +179,12 @@ private: } else if (column_type.isNullable()) { - if (function_name == "isNull" || function_name == "isNotNull") + if (function_name == "count" || function_name == "isNull" || function_name == "isNotNull") ++data.optimized_identifiers_count[qualified_name]; } else if (column_type.isMap()) { - if (function_name == "mapKeys" || function_name == "mapValues") + if (function_name == "length" || function_name == "mapKeys" || function_name == "mapValues") ++data.optimized_identifiers_count[qualified_name]; } } @@ -192,10 +193,10 @@ private: const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) { - const auto & tuple_element_constant_value = second_argument_constant_node->getValue(); - const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType(); + const auto & constant_value = second_argument_constant_node->getValue(); + const auto & constant_value_type = constant_value.getType(); - if (tuple_element_constant_value_type == Field::Types::String || tuple_element_constant_value_type == Field::Types::UInt64) + if (constant_value_type == Field::Types::String || constant_value_type == Field::Types::UInt64) ++data.optimized_identifiers_count[qualified_name]; } else if (function_name == "mapContains" && column_type.isMap()) @@ -209,6 +210,9 @@ private: /// Second pass optimizes functions to subcolumns for allowed identifiers. class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext { +private: + std::unordered_set identifiers_to_optimize; + public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; @@ -262,7 +266,7 @@ public: function_arguments_nodes.push_back(std::make_shared(column, column_source)); function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - resolveOrdinaryFunctionNode(*function_node, "equals"); + resolveOrdinaryFunctionNodeByName(*function_node, "equals", getContext()); } else if (function_name == "notEmpty") { @@ -274,12 +278,27 @@ public: function_arguments_nodes.push_back(std::make_shared(column, column_source)); function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - resolveOrdinaryFunctionNode(*function_node, "notEquals"); + resolveOrdinaryFunctionNodeByName(*function_node, "notEquals", getContext()); } } else if (column_type.isNullable()) { - if (function_name == "isNull") + if (function_name == "count") + { + /// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))` + column.name += ".null"; + column.type = std::make_shared(); + + auto column_node = std::make_shared(column, column_source); + auto function_node_not = std::make_shared("not"); + + function_node_not->getArguments().getNodes().push_back(std::move(column_node)); + resolveOrdinaryFunctionNodeByName(*function_node_not, "not", getContext()); + + function_arguments_nodes = {std::move(function_node_not)}; + resolveAggregateFunctionNodeByName(*function_node, "sum", {column.type}); + } + else if (function_name == "isNull") { /// Replace `isNull(nullable_argument)` with `nullable_argument.null` column.name += ".null"; @@ -295,12 +314,20 @@ public: function_arguments_nodes = {std::make_shared(column, column_source)}; - resolveOrdinaryFunctionNode(*function_node, "not"); + resolveOrdinaryFunctionNodeByName(*function_node, "not", getContext()); } } else if (column_type.isMap()) { - if (function_name == "mapKeys") + if (function_name == "length") + { + /// Replace `length(map_argument)` with `map_argument.size0` + column.name += ".size0"; + column.type = std::make_shared(); + + node = std::make_shared(column, column_source); + } + else if (function_name == "mapKeys") { /// Replace `mapKeys(map_argument)` with `map_argument.keys` column.name += ".keys"; @@ -364,19 +391,10 @@ public: auto has_function_argument = std::make_shared(column, column_source); function_arguments_nodes[0] = std::move(has_function_argument); - resolveOrdinaryFunctionNode(*function_node, "has"); + resolveOrdinaryFunctionNodeByName(*function_node, "has", getContext()); } } } - -private: - std::unordered_set identifiers_to_optimize; - - inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } }; } diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index 6b801925a6e..6d9e6765608 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include namespace DB @@ -41,25 +42,17 @@ public: if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull()) { - resolveAsCountAggregateFunction(*function_node); function_node->getArguments().getNodes().clear(); + resolveAggregateFunctionNodeByName(*function_node, "count", {}); } else if (function_node->getFunctionName() == "sum" && first_argument_constant_literal.getType() == Field::Types::UInt64 && first_argument_constant_literal.get() == 1) { - resolveAsCountAggregateFunction(*function_node); function_node->getArguments().getNodes().clear(); + resolveAggregateFunctionNodeByName(*function_node, "count", {}); } } -private: - static inline void resolveAsCountAggregateFunction(FunctionNode & function_node) - { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } }; } diff --git a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp index 9b9ceacdd4c..cc6fe95101d 100644 --- a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp +++ b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -140,16 +141,16 @@ private: const auto lhs = std::make_shared("greaterOrEquals"); lhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); lhs->getArguments().getNodes().push_back(std::make_shared(start_date_or_date_time)); - resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*lhs, lhs->getFunctionName(), getContext()); const auto rhs = std::make_shared("less"); rhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); rhs->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext()); const auto new_date_filter = std::make_shared("and"); new_date_filter->getArguments().getNodes() = {lhs, rhs}; - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext()); return new_date_filter; } @@ -158,16 +159,16 @@ private: const auto lhs = std::make_shared("less"); lhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); lhs->getArguments().getNodes().push_back(std::make_shared(start_date_or_date_time)); - resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*lhs, lhs->getFunctionName(), getContext()); const auto rhs = std::make_shared("greaterOrEquals"); rhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); rhs->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext()); const auto new_date_filter = std::make_shared("or"); new_date_filter->getArguments().getNodes() = {lhs, rhs}; - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext()); return new_date_filter; } @@ -176,7 +177,7 @@ private: const auto new_date_filter = std::make_shared("greaterOrEquals"); new_date_filter->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); new_date_filter->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext()); return new_date_filter; } @@ -185,7 +186,7 @@ private: const auto new_date_filter = std::make_shared("less"); new_date_filter->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); new_date_filter->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext()); return new_date_filter; } @@ -194,7 +195,7 @@ private: const auto new_date_filter = std::make_shared(comparator); new_date_filter->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); new_date_filter->getArguments().getNodes().push_back(std::make_shared(start_date_or_date_time)); - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext()); return new_date_filter; } @@ -205,12 +206,6 @@ private: comparator); } } - - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } }; } diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index 9c89670f3c6..b8962e5a4c1 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB { @@ -58,8 +59,7 @@ public: function_arguments_nodes.resize(2); function_arguments_nodes[0] = std::move(if_arguments_nodes[1]); function_arguments_nodes[1] = std::move(if_arguments_nodes[0]); - resolveAsAggregateFunctionWithIf( - *function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()}); + resolveAsAggregateFunctionWithIf(*function_node, function_arguments_nodes); } } else if (first_const_node) @@ -79,30 +79,21 @@ public: function_arguments_nodes.resize(2); function_arguments_nodes[0] = std::move(if_arguments_nodes[2]); function_arguments_nodes[1] = std::move(not_function); - resolveAsAggregateFunctionWithIf( - *function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()}); + resolveAsAggregateFunctionWithIf(*function_node, function_arguments_nodes); } } } private: - static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types) + static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const QueryTreeNodes & arguments) { auto result_type = function_node.getResultType(); + auto suffix = result_type->isNullable() ? "OrNullIf" : "If"; - std::string suffix = "If"; - if (result_type->isNullable()) - suffix = "OrNullIf"; - - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( + resolveAggregateFunctionNodeByName( + function_node, function_node.getFunctionName() + suffix, - function_node.getNullsAction(), - argument_types, - function_node.getAggregateFunction()->getParameters(), - properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); + {arguments[0]->getResultType(), arguments[1]->getResultType()}); } }; diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp index 7887a1b7175..2f6674946a3 100644 --- a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp +++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace DB @@ -77,50 +78,30 @@ public: const auto lhs = std::make_shared("sum"); lhs->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); - resolveAsAggregateFunctionNode(*lhs, column_type); + resolveAggregateFunctionNodeByName(*lhs, lhs->getFunctionName(), {column_type}); const auto rhs_count = std::make_shared("count"); rhs_count->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); - resolveAsAggregateFunctionNode(*rhs_count, column_type); + resolveAggregateFunctionNodeByName(*rhs_count, rhs_count->getFunctionName(), {column_type}); const auto rhs = std::make_shared("multiply"); rhs->getArguments().getNodes().push_back(func_plus_minus_nodes[literal_id]); rhs->getArguments().getNodes().push_back(rhs_count); - resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext()); const auto new_node = std::make_shared(Poco::toLower(func_plus_minus_node->getFunctionName())); if (column_id == 0) new_node->getArguments().getNodes() = {lhs, rhs}; else if (column_id == 1) new_node->getArguments().getNodes() = {rhs, lhs}; - resolveOrdinaryFunctionNode(*new_node, new_node->getFunctionName()); + + resolveOrdinaryFunctionNodeByName(*new_node, new_node->getFunctionName(), getContext()); if (!new_node) return; node = new_node; - } - -private: - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - const auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } - - static inline void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type) - { - AggregateFunctionProperties properties; - const auto aggregate_function = AggregateFunctionFactory::instance().get(function_node.getFunctionName(), - NullsAction::EMPTY, - {argument_type}, - {}, - properties); - - function_node.resolveAsAggregateFunction(aggregate_function); - } - }; } diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index c6b1c6eb851..78d5479843e 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -5,6 +5,7 @@ #include #include +#include #include @@ -65,7 +66,8 @@ public: auto multiplier_node = function_node_arguments_nodes[0]; function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]); function_node_arguments_nodes.resize(1); - resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); + + resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); if (constant_value_literal.get() != 1) { @@ -113,7 +115,7 @@ public: function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0]; function_node_arguments_nodes.resize(1); - resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); + resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); if (if_true_condition_value != 1) { @@ -142,7 +144,7 @@ public: function_node_arguments_nodes[0] = std::move(not_function); function_node_arguments_nodes.resize(1); - resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); + resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); if (if_false_condition_value != 1) { @@ -154,14 +156,6 @@ public: } private: - static inline void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type) - { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( - "countIf", NullsAction::EMPTY, {argument_type}, function_node.getAggregateFunction()->getParameters(), properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } inline QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right) { diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index a8382930506..610128a5754 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -7,6 +7,7 @@ #include #include +#include namespace DB @@ -75,15 +76,7 @@ public: for (const auto & function_node_argument : function_node_argument_nodes) argument_types.emplace_back(function_node_argument->getResultType()); - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( - function_node->getFunctionName(), - NullsAction::EMPTY, - argument_types, - function_node->getAggregateFunction()->getParameters(), - properties); - - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName(), argument_types); } }; diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index 11ebc45a369..d5e4e011cfa 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { @@ -175,11 +176,8 @@ public: /// Replace uniq of initial query to count if (match_subquery_with_distinct() || match_subquery_with_group_by()) { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties); - function_node->getArguments().getNodes().clear(); - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + resolveAggregateFunctionNodeByName(*function_node, "count", {}); } } }; diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index 53fcf534f64..c193619a35f 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -685,4 +685,26 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty return function_node; } +void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context) +{ + auto function = FunctionFactory::instance().get(function_name, context); + function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); +} + +void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name, const DataTypes & argument_types) +{ + chassert(function_node.isAggregateFunction()); + auto old_aggregate_function = function_node.getAggregateFunction(); + + AggregateFunctionProperties properties; + auto aggregate_function = AggregateFunctionFactory::instance().get( + function_name, + function_node.getNullsAction(), + argument_types, + old_aggregate_function->getParameters(), + properties); + + function_node.resolveAsAggregateFunction(std::move(aggregate_function)); +} + } diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index d3eb6ba3cc2..60f32d6b267 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -102,4 +102,12 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node); /// Wrap node into `_CAST` function QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context); +/// Resolves function node as ordinary function with given name. +/// Arguments and parameters are taken from the node. +void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context); + +/// Resolves function node as aggregate function with given name. +/// Arguments and parameters are taken from the node. +void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name, const DataTypes & argument_types); + } diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 458be843b59..b99506e948e 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -43,6 +43,7 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; } namespace @@ -170,6 +171,7 @@ struct QueryASTSettings struct QueryTreeSettings { bool run_passes = true; + bool dump_tree = true; bool dump_passes = false; bool dump_ast = false; Int64 passes = -1; @@ -179,6 +181,7 @@ struct QueryTreeSettings std::unordered_map> boolean_settings = { {"run_passes", run_passes}, + {"dump_tree", dump_tree}, {"dump_passes", dump_passes}, {"dump_ast", dump_ast} }; @@ -398,7 +401,11 @@ QueryPipeline InterpreterExplainQuery::executeImpl() throw Exception(ErrorCodes::INCORRECT_QUERY, "Only SELECT is supported for EXPLAIN QUERY TREE query"); auto settings = checkAndGetSettings(ast.getSettings()); + if (!settings.dump_tree && !settings.dump_ast) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Either 'dump_tree' or 'dump_ast' must be set for EXPLAIN QUERY TREE query"); + auto query_tree = buildQueryTree(ast.getExplainedQuery(), getContext()); + bool need_newline = false; if (settings.run_passes) { @@ -410,23 +417,26 @@ QueryPipeline InterpreterExplainQuery::executeImpl() if (settings.dump_passes) { query_tree_pass_manager.dump(buf, pass_index); - if (pass_index > 0) - buf << '\n'; + need_newline = true; } query_tree_pass_manager.run(query_tree, pass_index); + } + + if (settings.dump_tree) + { + if (need_newline) + buf << "\n\n"; query_tree->dumpTree(buf); - } - else - { - query_tree->dumpTree(buf); + need_newline = true; } if (settings.dump_ast) { - buf << '\n'; - buf << '\n'; + if (need_newline) + buf << "\n\n"; + query_tree->toAST()->format(IAST::FormatSettings(buf, false)); } diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index eaf27a7ae80..3167c2d37dc 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -40,27 +40,16 @@ ASTPtr transformCountNullableToSubcolumn(const String & name_in_storage, const S return makeASTFunction("sum", makeASTFunction("not", ast)); } -ASTPtr transformMapContainsToSubcolumn(const String & name_in_storage, const String & subcolumn_name, const ASTPtr & arg) +const std::unordered_map, String, decltype(&transformToSubcolumn)>> unary_function_to_subcolumn = { - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("has", ast, arg); -} - -const std::unordered_map> unary_function_to_subcolumn = -{ - {"length", {TypeIndex::Array, "size0", transformToSubcolumn}}, - {"empty", {TypeIndex::Array, "size0", transformEmptyToSubcolumn}}, - {"notEmpty", {TypeIndex::Array, "size0", transformNotEmptyToSubcolumn}}, - {"isNull", {TypeIndex::Nullable, "null", transformToSubcolumn}}, - {"isNotNull", {TypeIndex::Nullable, "null", transformIsNotNullToSubcolumn}}, - {"count", {TypeIndex::Nullable, "null", transformCountNullableToSubcolumn}}, - {"mapKeys", {TypeIndex::Map, "keys", transformToSubcolumn}}, - {"mapValues", {TypeIndex::Map, "values", transformToSubcolumn}}, -}; - -const std::unordered_map> binary_function_to_subcolumn -{ - {"mapContains", {TypeIndex::Map, "keys", transformMapContainsToSubcolumn}}, + {"length", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformToSubcolumn}}, + {"empty", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformEmptyToSubcolumn}}, + {"notEmpty", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformNotEmptyToSubcolumn}}, + {"isNull", {{TypeIndex::Nullable}, "null", transformToSubcolumn}}, + {"isNotNull", {{TypeIndex::Nullable}, "null", transformIsNotNullToSubcolumn}}, + {"count", {{TypeIndex::Nullable}, "null", transformCountNullableToSubcolumn}}, + {"mapKeys", {{TypeIndex::Map}, "keys", transformToSubcolumn}}, + {"mapValues", {{TypeIndex::Map}, "values", transformToSubcolumn}}, }; std::optional getColumnFromArgumentsToOptimize( @@ -116,10 +105,14 @@ void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & funct if (arguments.size() == 1) { auto it = unary_function_to_subcolumn.find(function.name); - if (it != unary_function_to_subcolumn.end() && std::get<0>(it->second) == column_type_id) + if (it == unary_function_to_subcolumn.end()) + return; + + const auto & expected_types_id = std::get<0>(it->second); + if (expected_types_id.contains(column_type_id)) ++data.optimized_identifiers_count[column->name]; } - else + else if (arguments.size() == 2) { if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) { @@ -131,11 +124,9 @@ void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & funct if (value_type == Field::Types::UInt64 || value_type == Field::Types::String) ++data.optimized_identifiers_count[column->name]; } - else + else if (function.name == "mapContains" && column_type_id == TypeIndex::Map) { - auto it = binary_function_to_subcolumn.find(function.name); - if (it != binary_function_to_subcolumn.end() && std::get<0>(it->second) == column_type_id) - ++data.optimized_identifiers_count[column->name]; + ++data.optimized_identifiers_count[column->name]; } } } @@ -148,7 +139,7 @@ void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, AST return; auto column_type_id = column->type->getTypeId(); - const auto & alias = function.tryGetAlias(); + auto alias = function.getAliasOrColumnName(); if (arguments.size() == 1) { @@ -156,8 +147,8 @@ void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, AST if (it == unary_function_to_subcolumn.end()) return; - const auto & [expected_type_id, subcolumn_name, transformer] = it->second; - if (column_type_id != expected_type_id) + const auto & [expected_types_id, subcolumn_name, transformer] = it->second; + if (!expected_types_id.contains(column_type_id)) return; ast = transformer(column->name, subcolumn_name); @@ -191,17 +182,10 @@ void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, AST ast = transformToSubcolumn(column->name, subcolumn_name); ast->setAlias(alias); } - else + else if (function.name == "mapContains" && column_type_id == TypeIndex::Map) { - auto it = binary_function_to_subcolumn.find(function.name); - if (it == binary_function_to_subcolumn.end()) - return; - - const auto & [expected_type_id, subcolumn_name, transformer] = it->second; - if (column_type_id != expected_type_id) - return; - - ast = transformer(column->name, subcolumn_name, arguments[1]); + auto subcolumn = transformToSubcolumn(column->name, "keys"); + ast = makeASTFunction("has", subcolumn, arguments[1]); ast->setAlias(alias); } } diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns.reference index a1cd31e2dc9..8c4017d6030 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns.reference +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns.reference @@ -2,25 +2,25 @@ 0 1 0 SELECT id IS NULL, - `n.null`, - NOT `n.null` + `n.null` AS `isNull(n)`, + NOT `n.null` AS `isNotNull(n)` FROM t_func_to_subcolumns 3 0 1 0 0 1 0 \N SELECT - `arr.size0`, - `arr.size0` = 0, - `arr.size0` != 0, + `arr.size0` AS `length(arr)`, + `arr.size0` = 0 AS `empty(arr)`, + `arr.size0` != 0 AS `notEmpty(arr)`, empty(n) FROM t_func_to_subcolumns ['foo','bar'] [1,2] [] [] SELECT - `m.keys`, - `m.values` + `m.keys` AS `mapKeys(m)`, + `m.values` AS `mapValues(m)` FROM t_func_to_subcolumns 1 -SELECT sum(NOT `n.null`) +SELECT sum(NOT `n.null`) AS `count(n)` FROM t_func_to_subcolumns 2 SELECT count(id) @@ -30,7 +30,7 @@ FROM t_func_to_subcolumns 3 0 0 SELECT id, - `n.null`, + `n.null` AS `isNull(n)`, right.n IS NULL FROM t_func_to_subcolumns AS left ALL FULL OUTER JOIN diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns.sql b/tests/queries/0_stateless/01872_functions_to_subcolumns.sql index eb0165f4e13..45f83bf20e5 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns.sql +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns.sql @@ -1,6 +1,5 @@ DROP TABLE IF EXISTS t_func_to_subcolumns; -SET allow_experimental_map_type = 1; SET optimize_functions_to_subcolumns = 1; CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference new file mode 100644 index 00000000000..ce5e46fa271 --- /dev/null +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference @@ -0,0 +1,50 @@ +0 0 1 +0 1 0 +SELECT + __table1.id IS NULL AS `isNull(id)`, + __table1.`n.null` AS `isNull(n)`, + NOT __table1.`n.null` AS `isNotNull(n)` +FROM default.t_func_to_subcolumns AS __table1 +3 0 1 0 +0 1 0 \N +SELECT + __table1.`arr.size0` AS `length(arr)`, + __table1.`arr.size0` = 0 AS `empty(arr)`, + __table1.`arr.size0` != 0 AS `notEmpty(arr)`, + empty(__table1.n) AS `empty(n)` +FROM default.t_func_to_subcolumns AS __table1 +['foo','bar'] [1,2] +[] [] +SELECT + __table1.`m.keys` AS `mapKeys(m)`, + __table1.`m.values` AS `mapValues(m)` +FROM default.t_func_to_subcolumns AS __table1 +1 +SELECT sum(NOT __table1.`n.null`) AS `count(n)` +FROM default.t_func_to_subcolumns AS __table1 +2 +SELECT count(__table1.id) AS `count(id)` +FROM default.t_func_to_subcolumns AS __table1 +1 0 0 +2 1 0 +3 0 0 +SELECT + __table1.id AS id, + __table1.`n.null` AS `isNull(n)`, + __table2.n IS NULL AS `isNull(right.n)` +FROM default.t_func_to_subcolumns AS __table1 +ALL FULL OUTER JOIN +( + + SELECT + 1 AS id, + \'qqq\' AS n + FROM system.one AS __table4 + UNION ALL + SELECT + 3 AS id, + \'www\' AS `\'www\'` + FROM system.one AS __table6 +) AS __table2 USING (id) +0 10 +0 20 diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql new file mode 100644 index 00000000000..c1ab6909e2f --- /dev/null +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS t_func_to_subcolumns; + +SET allow_experimental_analyzer = 1; +SET optimize_functions_to_subcolumns = 1; + +CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) +ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map()); + +SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; + +SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; + +SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; + +SELECT count(n) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT count(n) FROM t_func_to_subcolumns; + +SELECT count(id) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT count(id) FROM t_func_to_subcolumns; + +SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left +FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left +FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); + +DROP TABLE t_func_to_subcolumns; + +DROP TABLE IF EXISTS t_tuple_null; + +CREATE TABLE t_tuple_null (t Tuple(null UInt32)) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_tuple_null VALUES ((10)), ((20)); + +SELECT t IS NULL, t.null FROM t_tuple_null; + +DROP TABLE t_tuple_null; diff --git a/tests/queries/0_stateless/02115_map_contains.reference b/tests/queries/0_stateless/02115_map_contains.reference index 975e9876237..e4ae4f951ba 100644 --- a/tests/queries/0_stateless/02115_map_contains.reference +++ b/tests/queries/0_stateless/02115_map_contains.reference @@ -1,4 +1,4 @@ -SELECT has(`m.keys`, \'a\') +SELECT has(`m.keys`, \'a\') AS `mapContains(m, \'a\')` FROM t_map_contains 1 0 diff --git a/tests/queries/0_stateless/02115_map_contains_analyzer.reference b/tests/queries/0_stateless/02115_map_contains_analyzer.reference new file mode 100644 index 00000000000..7da5243e727 --- /dev/null +++ b/tests/queries/0_stateless/02115_map_contains_analyzer.reference @@ -0,0 +1,4 @@ +SELECT has(__table1.`m.keys`, \'a\') AS `mapContains(m, \'a\')` +FROM default.t_map_contains AS __table1 +1 +0 diff --git a/tests/queries/0_stateless/02115_map_contains_analyzer.sql b/tests/queries/0_stateless/02115_map_contains_analyzer.sql new file mode 100644 index 00000000000..46e02eca4f0 --- /dev/null +++ b/tests/queries/0_stateless/02115_map_contains_analyzer.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS t_map_contains; + +CREATE TABLE t_map_contains (m Map(String, UInt32)) ENGINE = Memory; + +INSERT INTO t_map_contains VALUES (map('a', 1, 'b', 2)), (map('c', 3, 'd', 4)); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT mapContains(m, 'a') FROM t_map_contains; +SELECT mapContains(m, 'a') FROM t_map_contains; + +DROP TABLE t_map_contains; diff --git a/tests/queries/0_stateless/02116_tuple_element.reference b/tests/queries/0_stateless/02116_tuple_element.reference index 121b08d02f1..a8004f5e74c 100644 --- a/tests/queries/0_stateless/02116_tuple_element.reference +++ b/tests/queries/0_stateless/02116_tuple_element.reference @@ -1,17 +1,17 @@ 1 -SELECT `t1.a` +SELECT `t1.a` AS `tupleElement(t1, 1)` FROM t_tuple_element a -SELECT `t1.s` +SELECT `t1.s` AS `tupleElement(t1, 2)` FROM t_tuple_element 1 -SELECT `t1.a` +SELECT `t1.a` AS `tupleElement(t1, \'a\')` FROM t_tuple_element 2 -SELECT `t2.1` +SELECT `t2.1` AS `tupleElement(t2, 1)` FROM t_tuple_element 2 -SELECT `t2.1` +SELECT `t2.1` AS `tupleElement(t2, 1)` FROM t_tuple_element 1 2 WITH (1, 2) AS t diff --git a/tests/queries/0_stateless/02116_tuple_element_analyzer.reference b/tests/queries/0_stateless/02116_tuple_element_analyzer.reference new file mode 100644 index 00000000000..d30f3a6cc58 --- /dev/null +++ b/tests/queries/0_stateless/02116_tuple_element_analyzer.reference @@ -0,0 +1,25 @@ +1 +SELECT __table1.`t1.a` AS `tupleElement(t1, 1)` +FROM default.t_tuple_element AS __table1 +a +SELECT __table1.`t1.s` AS `tupleElement(t1, 2)` +FROM default.t_tuple_element AS __table1 +1 +SELECT __table1.`t1.a` AS `tupleElement(t1, \'a\')` +FROM default.t_tuple_element AS __table1 +2 +SELECT __table1.`t2.1` AS `tupleElement(t2, 1)` +FROM default.t_tuple_element AS __table1 +2 +SELECT __table1.`t2.1` AS `tupleElement(t2, 1)` +FROM default.t_tuple_element AS __table1 +1 2 +SELECT + 1 AS `tupleElement(t, 1)`, + 2 AS `tupleElement(t, 2)` +FROM system.one AS __table1 +1 2 +SELECT + _CAST(1, \'UInt32\') AS `tupleElement(t, 1)`, + _CAST(2, \'UInt32\') AS `tupleElement(t, \'b\')` +FROM system.one AS __table1 diff --git a/tests/queries/0_stateless/02116_tuple_element_analyzer.sql b/tests/queries/0_stateless/02116_tuple_element_analyzer.sql new file mode 100644 index 00000000000..5aeb72c9ee4 --- /dev/null +++ b/tests/queries/0_stateless/02116_tuple_element_analyzer.sql @@ -0,0 +1,43 @@ +DROP TABLE IF EXISTS t_tuple_element; + +CREATE TABLE t_tuple_element(t1 Tuple(a UInt32, s String), t2 Tuple(UInt32, String)) ENGINE = Memory; +INSERT INTO t_tuple_element VALUES ((1, 'a'), (2, 'b')); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +SELECT t1.1 FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT t1.1 FROM t_tuple_element; + +SELECT tupleElement(t1, 2) FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t1, 2) FROM t_tuple_element; + +SELECT tupleElement(t1, 'a') FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t1, 'a') FROM t_tuple_element; + +SELECT tupleElement(number, 1) FROM numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } +SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT t2.1 FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT t2.1 FROM t_tuple_element; + +SELECT tupleElement(t2, 1) FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t2, 1) FROM t_tuple_element; + +SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } +SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +DROP TABLE t_tuple_element; + +WITH (1, 2) AS t SELECT t.1, t.2; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 WITH (1, 2) AS t SELECT t.1, t.2; + +WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference new file mode 100644 index 00000000000..4787c660c68 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference @@ -0,0 +1,14 @@ +SELECT + `arr.size0` AS `length(arr)`, + `n.null` AS `isNull(n)` +FROM t_column_names +┌─length(arr)─┬─isNull(n)─┐ +│ 3 │ 0 │ +└─────────────┴───────────┘ +SELECT + __table1.`arr.size0` AS `length(arr)`, + __table1.`n.null` AS `isNull(n)` +FROM default.t_column_names AS __table1 +┌─length(arr)─┬─isNull(n)─┐ +│ 3 │ 0 │ +└─────────────┴───────────┘ diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql new file mode 100644 index 00000000000..89c39046df3 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS t_column_names; + +CREATE TABLE t_column_names (arr Array(UInt64), n Nullable(String)) ENGINE = Memory; + +INSERT INTO t_column_names VALUES ([1, 2, 3], 'foo'); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 0; + +EXPLAIN SYNTAX SELECT length(arr), isNull(n) FROM t_column_names; +SELECT length(arr), isNull(n) FROM t_column_names FORMAT PrettyCompactNoEscapes; + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr), isNull(n) FROM t_column_names; +SELECT length(arr), isNull(n) FROM t_column_names FORMAT PrettyCompactNoEscapes; + +DROP TABLE t_column_names; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference new file mode 100644 index 00000000000..90596ce1000 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference @@ -0,0 +1,8 @@ +SELECT `m.size0` AS `length(m)` +FROM t_func_to_subcolumns_map +2 +1 +SELECT __table1.`m.size0` AS `length(m)` +FROM default.t_func_to_subcolumns_map AS __table1 +2 +1 diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql new file mode 100644 index 00000000000..b5687696b43 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS t_func_to_subcolumns_map; + +CREATE TABLE t_func_to_subcolumns_map (id UInt64, m Map(String, UInt64)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_func_to_subcolumns_map VALUES (1, map('aaa', 1, 'bbb', 2)) (2, map('ccc', 3)); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 0; + +EXPLAIN SYNTAX SELECT length(m) FROM t_func_to_subcolumns_map; +SELECT length(m) FROM t_func_to_subcolumns_map; + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(m) FROM t_func_to_subcolumns_map; +SELECT length(m) FROM t_func_to_subcolumns_map; + +DROP TABLE t_func_to_subcolumns_map; From 368c99f1827acabd32a1ec11dba8711c627cdd53 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 23 Jan 2024 02:46:18 +0000 Subject: [PATCH 010/439] fix crash with analyzer --- ...egateFunctionsArithmericOperationsPass.cpp | 7 ++--- src/Analyzer/Passes/CountDistinctPass.cpp | 2 +- .../Passes/FunctionToSubcolumnsPass.cpp | 2 +- .../Passes/NormalizeCountVariantsPass.cpp | 4 +-- .../RewriteAggregateFunctionWithIfPass.cpp | 12 +++------ .../RewriteSumFunctionWithSumAndCountPass.cpp | 4 +-- src/Analyzer/Passes/SumIfToCountIfPass.cpp | 6 ++--- .../UniqInjectiveFunctionsEliminationPass.cpp | 10 +------ src/Analyzer/Passes/UniqToCountPass.cpp | 2 +- src/Analyzer/Utils.cpp | 26 ++++++------------- src/Analyzer/Utils.h | 2 +- 11 files changed, 26 insertions(+), 51 deletions(-) diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index b8a477b8523..a3d3b0ca13a 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -165,12 +165,9 @@ private: auto aggregate_function_clone = aggregate_function->clone(); auto & aggregate_function_clone_typed = aggregate_function_clone->as(); - aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument }; - resolveAggregateFunctionNodeByName( - aggregate_function_clone_typed, - result_aggregate_function_name, - {arithmetic_function_clone_argument->getResultType()}); + aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument }; + resolveAggregateFunctionNodeByName(aggregate_function_clone_typed, result_aggregate_function_name); arithmetic_function_clone_arguments_nodes[arithmetic_function_argument_index] = std::move(aggregate_function_clone); resolveOrdinaryFunctionNodeByName(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName(), getContext()); diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp index a73ca4befcf..45d0301a0fe 100644 --- a/src/Analyzer/Passes/CountDistinctPass.cpp +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -80,7 +80,7 @@ public: auto result_type = function_node->getResultType(); function_node->getArguments().getNodes().clear(); - resolveAggregateFunctionNodeByName(*function_node, "count", {}); + resolveAggregateFunctionNodeByName(*function_node, "count"); } }; diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index ac13a505a52..de8b7753700 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -296,7 +296,7 @@ public: resolveOrdinaryFunctionNodeByName(*function_node_not, "not", getContext()); function_arguments_nodes = {std::move(function_node_not)}; - resolveAggregateFunctionNodeByName(*function_node, "sum", {column.type}); + resolveAggregateFunctionNodeByName(*function_node, "sum"); } else if (function_name == "isNull") { diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index 6d9e6765608..1810158a2d7 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -43,14 +43,14 @@ public: if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull()) { function_node->getArguments().getNodes().clear(); - resolveAggregateFunctionNodeByName(*function_node, "count", {}); + resolveAggregateFunctionNodeByName(*function_node, "count"); } else if (function_node->getFunctionName() == "sum" && first_argument_constant_literal.getType() == Field::Types::UInt64 && first_argument_constant_literal.get() == 1) { function_node->getArguments().getNodes().clear(); - resolveAggregateFunctionNodeByName(*function_node, "count", {}); + resolveAggregateFunctionNodeByName(*function_node, "count"); } } }; diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index b8962e5a4c1..37eb3d98614 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -59,7 +59,7 @@ public: function_arguments_nodes.resize(2); function_arguments_nodes[0] = std::move(if_arguments_nodes[1]); function_arguments_nodes[1] = std::move(if_arguments_nodes[0]); - resolveAsAggregateFunctionWithIf(*function_node, function_arguments_nodes); + resolveAsAggregateFunctionWithIf(*function_node); } } else if (first_const_node) @@ -79,21 +79,17 @@ public: function_arguments_nodes.resize(2); function_arguments_nodes[0] = std::move(if_arguments_nodes[2]); function_arguments_nodes[1] = std::move(not_function); - resolveAsAggregateFunctionWithIf(*function_node, function_arguments_nodes); + resolveAsAggregateFunctionWithIf(*function_node); } } } private: - static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const QueryTreeNodes & arguments) + static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node) { auto result_type = function_node.getResultType(); auto suffix = result_type->isNullable() ? "OrNullIf" : "If"; - - resolveAggregateFunctionNodeByName( - function_node, - function_node.getFunctionName() + suffix, - {arguments[0]->getResultType(), arguments[1]->getResultType()}); + resolveAggregateFunctionNodeByName(function_node, function_node.getFunctionName() + suffix); } }; diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp index 2f6674946a3..39f9e3b625b 100644 --- a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp +++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp @@ -78,11 +78,11 @@ public: const auto lhs = std::make_shared("sum"); lhs->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); - resolveAggregateFunctionNodeByName(*lhs, lhs->getFunctionName(), {column_type}); + resolveAggregateFunctionNodeByName(*lhs, lhs->getFunctionName()); const auto rhs_count = std::make_shared("count"); rhs_count->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); - resolveAggregateFunctionNodeByName(*rhs_count, rhs_count->getFunctionName(), {column_type}); + resolveAggregateFunctionNodeByName(*rhs_count, rhs_count->getFunctionName()); const auto rhs = std::make_shared("multiply"); rhs->getArguments().getNodes().push_back(func_plus_minus_nodes[literal_id]); diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 78d5479843e..e072ba5ad48 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -67,7 +67,7 @@ public: function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]); function_node_arguments_nodes.resize(1); - resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); + resolveAggregateFunctionNodeByName(*function_node, "countIf"); if (constant_value_literal.get() != 1) { @@ -115,7 +115,7 @@ public: function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0]; function_node_arguments_nodes.resize(1); - resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); + resolveAggregateFunctionNodeByName(*function_node, "countIf"); if (if_true_condition_value != 1) { @@ -144,7 +144,7 @@ public: function_node_arguments_nodes[0] = std::move(not_function); function_node_arguments_nodes.resize(1); - resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); + resolveAggregateFunctionNodeByName(*function_node, "countIf"); if (if_false_condition_value != 1) { diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index 610128a5754..1339fc07ac8 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -68,15 +68,7 @@ public: if (!replaced_argument) return; - const auto & function_node_argument_nodes = function_node->getArguments().getNodes(); - - DataTypes argument_types; - argument_types.reserve(function_node_argument_nodes.size()); - - for (const auto & function_node_argument : function_node_argument_nodes) - argument_types.emplace_back(function_node_argument->getResultType()); - - resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName(), argument_types); + resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName()); } }; diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index d5e4e011cfa..929c2731e5d 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -177,7 +177,7 @@ public: if (match_subquery_with_distinct() || match_subquery_with_group_by()) { function_node->getArguments().getNodes().clear(); - resolveAggregateFunctionNodeByName(*function_node, "count", {}); + resolveAggregateFunctionNodeByName(*function_node, "count"); } } }; diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index c193619a35f..efada8ef16a 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -528,16 +528,16 @@ private: bool has_function = false; }; -inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_node) +inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode & function_node, const String & function_name) { Array parameters; - for (const auto & param : function_node->getParameters()) + for (const auto & param : function_node.getParameters()) { auto * constant = param->as(); parameters.push_back(constant->getValue()); } - const auto & function_node_argument_nodes = function_node->getArguments().getNodes(); + const auto & function_node_argument_nodes = function_node.getArguments().getNodes(); DataTypes argument_types; argument_types.reserve(function_node_argument_nodes.size()); @@ -547,7 +547,7 @@ inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_nod AggregateFunctionProperties properties; auto action = NullsAction::EMPTY; - return AggregateFunctionFactory::instance().get(function_node->getFunctionName(), action, argument_types, parameters, properties); + return AggregateFunctionFactory::instance().get(function_name, action, argument_types, parameters, properties); } } @@ -628,11 +628,11 @@ void rerunFunctionResolve(FunctionNode * function_node, ContextPtr context) { if (name == "nothing") return; - function_node->resolveAsAggregateFunction(resolveAggregateFunction(function_node)); + function_node->resolveAsAggregateFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName())); } else if (function_node->isWindowFunction()) { - function_node->resolveAsWindowFunction(resolveAggregateFunction(function_node)); + function_node->resolveAsWindowFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName())); } } @@ -691,19 +691,9 @@ void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const Strin function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); } -void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name, const DataTypes & argument_types) +void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name) { - chassert(function_node.isAggregateFunction()); - auto old_aggregate_function = function_node.getAggregateFunction(); - - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( - function_name, - function_node.getNullsAction(), - argument_types, - old_aggregate_function->getParameters(), - properties); - + auto aggregate_function = resolveAggregateFunction(function_node, function_name); function_node.resolveAsAggregateFunction(std::move(aggregate_function)); } diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index 60f32d6b267..75d874c1736 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -108,6 +108,6 @@ void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const Strin /// Resolves function node as aggregate function with given name. /// Arguments and parameters are taken from the node. -void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name, const DataTypes & argument_types); +void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name); } From d09f5d18f16c7e988338531be2432187ce633891 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 24 Jan 2024 18:14:38 +0100 Subject: [PATCH 011/439] Repro test --- .../02967_prewhere_no_columns.reference | 2 + .../0_stateless/02967_prewhere_no_columns.sql | 51 +++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 tests/queries/0_stateless/02967_prewhere_no_columns.reference create mode 100644 tests/queries/0_stateless/02967_prewhere_no_columns.sql diff --git a/tests/queries/0_stateless/02967_prewhere_no_columns.reference b/tests/queries/0_stateless/02967_prewhere_no_columns.reference new file mode 100644 index 00000000000..df105254618 --- /dev/null +++ b/tests/queries/0_stateless/02967_prewhere_no_columns.reference @@ -0,0 +1,2 @@ +105 +105 diff --git a/tests/queries/0_stateless/02967_prewhere_no_columns.sql b/tests/queries/0_stateless/02967_prewhere_no_columns.sql new file mode 100644 index 00000000000..efcc952caa2 --- /dev/null +++ b/tests/queries/0_stateless/02967_prewhere_no_columns.sql @@ -0,0 +1,51 @@ +CREATE TABLE t_02967 +( + `key` Date, + `value` UInt16 +) +ENGINE = MergeTree +ORDER BY key +SETTINGS + index_granularity_bytes = 0 --8192 --, min_index_granularity_bytes = 2 + , index_granularity = 100 + , min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0 +-- +-- , min_bytes_for_wide_part = 2 +AS SELECT + number, + repeat(toString(number), 5) +FROM numbers(105.); + + + +-- Check with newly inserted data part. It's in-memory structured are filled at insert time. +SELECT + count(ignore(*)) +FROM t_02967 +PREWHERE CAST(ignore() + 1 as UInt8) +GROUP BY + ignore(65535, *), + ignore(255, 256, *) +SETTINGS + --send_logs_level='test', + max_threads=1; + + + +-- Reload part form disk to check that in-meory structures where properly serilaized-deserialized +DETACH TABLE t_02967; +ATTACH TABLE t_02967; + + +SELECT + count(ignore(*)) +FROM t_02967 +PREWHERE CAST(ignore() + 1 as UInt8) +GROUP BY + ignore(65535, *), + ignore(255, 256, *) +SETTINGS + --send_logs_level='test', + max_threads=1; + +DROP TABLE t_02967; From a11a5e783b0089a943f96ce58fedcf15bb586fc0 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 24 Jan 2024 18:15:46 +0100 Subject: [PATCH 012/439] Adjust last granule after reading row count --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 87f23b0da2a..d7221f5a536 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1176,6 +1176,14 @@ void IMergeTreeDataPart::loadRowsCount() auto buf = metadata_manager->read("count.txt"); readIntText(rows_count, *buf); assertEOF(*buf); + + if (!index_granularity.empty() && rows_count < index_granularity.getTotalRows() && index_granularity_info.fixed_index_granularity) + { + /// Adjust last granule size to match the number of rows in the part in case of fixed index_granularity. + index_granularity.popMark(); + index_granularity.appendMark(rows_count % index_granularity_info.fixed_index_granularity); + chassert(rows_count == index_granularity.getTotalRows()); + } }; if (index_granularity.empty()) From 367a874edd25ffe4c9cfc1e2105e9fad899c68bd Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 24 Jan 2024 18:16:15 +0100 Subject: [PATCH 013/439] Adjust last granule when creating part --- src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 9d373504473..a670807a997 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -760,7 +760,7 @@ void MergeTreeDataPartWriterWide::adjustLastMarkIfNeedAndFlushToDisk(size_t new_ /// We can adjust marks only if we computed granularity for blocks. /// Otherwise we cannot change granularity because it will differ from /// other columns - if (compute_granularity && settings.can_use_adaptive_granularity) +// if (compute_granularity && settings.can_use_adaptive_granularity) { if (getCurrentMark() != index_granularity.getMarksCount() - 1) throw Exception(ErrorCodes::LOGICAL_ERROR, From 502b8239a219bdace74e1de32e337c97024c2bee Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Thu, 25 Jan 2024 13:40:02 +0100 Subject: [PATCH 014/439] Allow last mark not to match fixed granularity value --- src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index a670807a997..bb60e682f1b 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -535,7 +535,10 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai if (index_granularity_rows != index_granularity.getMarkRows(mark_num)) { - throw Exception( + /// With fixed granularity we can have last mark with less rows than granularity + const bool is_last_mark = (mark_num + 1 == index_granularity.getMarksCount()); + if (!data_part->index_granularity_info.fixed_index_granularity || !is_last_mark) + throw Exception( ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{}" " (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}", From ca2bfdb9aec54bb0bf3c983bd0c62fcf2c48c1dd Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Thu, 25 Jan 2024 13:41:49 +0100 Subject: [PATCH 015/439] Update according to last mark smaller size --- tests/queries/1_stateful/00166_explain_estimate.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/1_stateful/00166_explain_estimate.reference b/tests/queries/1_stateful/00166_explain_estimate.reference index 71ddd681581..85ecd0b9a71 100644 --- a/tests/queries/1_stateful/00166_explain_estimate.reference +++ b/tests/queries/1_stateful/00166_explain_estimate.reference @@ -1,5 +1,5 @@ test hits 1 57344 7 -test hits 1 8839168 1079 -test hits 1 835584 102 +test hits 1 8832938 1079 +test hits 1 829354 102 test hits 1 8003584 977 test hits 2 581632 71 From 02b349822f10c1026ee1bf1b3b0d4f213864d929 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Mon, 29 Jan 2024 19:03:34 +0100 Subject: [PATCH 016/439] Fix for last mark equal to fixed granule size --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index d7221f5a536..8ebe39a916e 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1182,7 +1182,10 @@ void IMergeTreeDataPart::loadRowsCount() /// Adjust last granule size to match the number of rows in the part in case of fixed index_granularity. index_granularity.popMark(); index_granularity.appendMark(rows_count % index_granularity_info.fixed_index_granularity); - chassert(rows_count == index_granularity.getTotalRows()); + if (rows_count != index_granularity.getTotalRows()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Index granularity total rows in part {} does not match rows_count: {}, instead of {}", + name, index_granularity.getTotalRows(), rows_count); } }; From 7d62e224b50d10188b9876684a8a564b5965347c Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 5 Feb 2024 15:31:42 +0100 Subject: [PATCH 017/439] Pass correct Context --- .../Passes/ComparisonTupleEliminationPass.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp index 42b53f667b4..88da37f014b 100644 --- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp +++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp @@ -19,19 +19,18 @@ namespace DB namespace { -class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitor +class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitorWithContext { public: - explicit ComparisonTupleEliminationPassVisitor(ContextPtr context_) - : context(std::move(context_)) - {} + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; static bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child) { return child->getNodeType() != QueryTreeNodeType::TABLE_FUNCTION; } - void visitImpl(QueryTreeNodePtr & node) const + void enterImpl(QueryTreeNodePtr & node) const { auto * function_node = node->as(); if (!function_node) @@ -172,13 +171,13 @@ private: { auto result_function = std::make_shared("and"); result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions); - resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), context); + resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), getContext()); if (comparison_function_name == "notEquals") { auto not_function = std::make_shared("not"); not_function->getArguments().getNodes().push_back(std::move(result_function)); - resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), context); + resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), getContext()); result_function = std::move(not_function); } @@ -198,12 +197,10 @@ private: comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument)); comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument)); - resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), context); + resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), getContext()); return comparison_function; } - - ContextPtr context; }; } From bac29c0bbafb7a97e033ca08bdc12a9d13914c15 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 6 Feb 2024 15:52:19 +0000 Subject: [PATCH 018/439] add test for variant subcolumn --- .../RewriteFunctionToSubcolumnVisitor.cpp | 6 ++++++ ..._functions_to_subcolumns_variant.reference | 8 +++++++ .../02971_functions_to_subcolumns_variant.sql | 21 +++++++++++++++++++ 3 files changed, 35 insertions(+) create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 04451947796..437d46c24b2 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -124,6 +124,12 @@ void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & funct if (value_type == Field::Types::UInt64 || value_type == Field::Types::String) ++data.optimized_identifiers_count[column->name]; } + else if (function.name == "variantElement" && column_type_id == TypeIndex::Variant) + { + const auto * literal = arguments[1]->as(); + if (literal && literal->value.getType() == Field::Types::String) + ++data.optimized_identifiers_count[column->name]; + } else if (function.name == "mapContains" && column_type_id == TypeIndex::Map) { ++data.optimized_identifiers_count[column->name]; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference new file mode 100644 index 00000000000..7a52155fc2d --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference @@ -0,0 +1,8 @@ +SELECT `v.String` AS `variantElement(v, \'String\')` +FROM t_func_to_subcolumns_variant +foo +\N +SELECT __table1.`v.String` AS `variantElement(v, \'String\')` +FROM default.t_func_to_subcolumns_variant AS __table1 +foo +\N diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql new file mode 100644 index 00000000000..1cedd877289 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS t_func_to_subcolumns_variant; + +SET allow_experimental_variant_type = 1; + +CREATE TABLE t_func_to_subcolumns_variant (id UInt64, v Variant(String, UInt64)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_func_to_subcolumns_variant VALUES (1, 'foo') (2, 111); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 0; + +EXPLAIN SYNTAX SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; +SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; +SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; + +DROP TABLE t_func_to_subcolumns_variant; From 46f6867896acec7ee52a25a1e215e78a1eb9365d Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 6 Feb 2024 17:37:02 +0000 Subject: [PATCH 019/439] refactor FunctionToSubcolumnsPass --- .../Passes/FunctionToSubcolumnsPass.cpp | 399 ++++++++---------- ...2971_functions_to_subcolumns_map.reference | 16 + .../02971_functions_to_subcolumns_map.sql | 12 + 3 files changed, 210 insertions(+), 217 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 877b0ef7232..f0392a0d9d4 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -25,6 +26,181 @@ namespace DB namespace { +void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnNode & column_node, ContextPtr) +{ + /// Replace `length(argument)` with `argument.size0` + /// `argument` may be Array or Map. + + NameAndTypePair column{column_node.getColumnName() + ".size0", std::make_shared()}; + node = std::make_shared(column, column_node.getColumnSource()); +} + +template +void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) +{ + /// Replace `empty(argument)` with `equals(argument.size0, 0)` if positive + /// Replace `notEmpty(argument)` with `notEquals(argument.size0, 0)` if not positive + /// `argument` may be Array or Map. + + NameAndTypePair column{column_node.getColumnName() + ".size0", std::make_shared()}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + function_arguments_nodes.clear(); + function_arguments_nodes.push_back(std::make_shared(column, column_node.getColumnSource())); + function_arguments_nodes.push_back(std::make_shared(static_cast(0))); + + auto function_name = positive ? "equals" : "notEquals"; + resolveOrdinaryFunctionNodeByName(function_node, function_name, std::move(context)); +} + +String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple) +{ + if (value.getType() == Field::Types::String) + return value.get(); + + if (value.getType() == Field::Types::UInt64) + return data_type_tuple.getNameByPosition(value.get()); + + return ""; +} + +String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &) +{ + if (value.getType() == Field::Types::String) + return value.get(); + + return ""; +} + +template +void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) +{ + /// Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` with `tuple_argument.column_name`. + /// Replace `variantElement(variant_argument, string_literal)` with `variant_argument.column_name`. + + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + if (function_arguments_nodes.size() != 2) + return; + + const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); + if (!second_argument_constant_node) + return; + + auto column_type = column_node.getColumnType(); + const auto & data_type_concrete = assert_cast(*column_type); + + auto subcolumn_name = getSubcolumnNameForElement(second_argument_constant_node->getValue(), data_type_concrete); + if (subcolumn_name.empty()) + return; + + NameAndTypePair column{column_node.getColumnName() + "." + subcolumn_name, function_node.getResultType()}; + node = std::make_shared(column, column_node.getColumnSource()); +} + +using NodeToSubcolumnTransformer = std::function; + +std::map, NodeToSubcolumnTransformer> node_transformers = +{ + { + {TypeIndex::Array, "length"}, optimizeFunctionLength, + }, + { + {TypeIndex::Array, "empty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Array, "notEmpty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Map, "length"}, optimizeFunctionLength, + }, + { + {TypeIndex::Map, "empty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Map, "notEmpty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Map, "mapKeys"}, + [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) + { + /// Replace `mapKeys(map_argument)` with `map_argument.keys` + NameAndTypePair column{column_node.getColumnName() + ".keys", function_node.getResultType()}; + node = std::make_shared(column, column_node.getColumnSource()); + }, + }, + { + {TypeIndex::Map, "mapValues"}, + [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) + { + /// Replace `mapValues(map_argument)` with `map_argument.values` + NameAndTypePair column{column_node.getColumnName() + ".values", function_node.getResultType()}; + node = std::make_shared(column, column_node.getColumnSource()); + }, + }, + { + {TypeIndex::Map, "mapContains"}, + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + { + /// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)` + auto column_type = column_node.getColumnType(); + const auto & data_type_map = assert_cast(*column_type); + + NameAndTypePair column{column_node.getColumnName() + ".keys", std::make_shared(data_type_map.getKeyType())}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + auto has_function_argument = std::make_shared(column, column_node.getColumnSource()); + function_arguments_nodes[0] = std::move(has_function_argument); + + resolveOrdinaryFunctionNodeByName(function_node, "has", context); + }, + }, + { + {TypeIndex::Nullable, "count"}, + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + { + /// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))` + NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + auto new_column_node = std::make_shared(column, column_node.getColumnSource()); + auto function_node_not = std::make_shared("not"); + + function_node_not->getArguments().getNodes().push_back(std::move(new_column_node)); + resolveOrdinaryFunctionNodeByName(*function_node_not, "not", context); + + function_arguments_nodes = {std::move(function_node_not)}; + resolveAggregateFunctionNodeByName(function_node, "sum"); + }, + }, + { + {TypeIndex::Nullable, "isNull"}, + [](QueryTreeNodePtr & node, FunctionNode &, ColumnNode & column_node, ContextPtr) + { + /// Replace `isNull(nullable_argument)` with `nullable_argument.null` + NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; + node = std::make_shared(column, column_node.getColumnSource()); + }, + }, + { + {TypeIndex::Nullable, "isNotNull"}, + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + { + /// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)` + NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + function_arguments_nodes = {std::make_shared(column, column_node.getColumnSource())}; + resolveOrdinaryFunctionNodeByName(function_node, "not", context); + }, + }, + { + {TypeIndex::Tuple, "tupleElement"}, optimizeTupleOrVariantElement, + }, + { + {TypeIndex::Variant, "variantElement"}, optimizeTupleOrVariantElement, + }, +}; + std::tuple getTypedNodesForOptimization(const QueryTreeNodePtr & node) { auto * function_node = node->as(); @@ -161,54 +337,13 @@ private: void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node) { - const auto & function_arguments_nodes = function_node.getArguments().getNodes(); - const auto & function_name = function_node.getFunctionName(); - auto column = first_argument_column_node.getColumn(); - WhichDataType column_type(column.type); - auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); + Identifier qualified_name({table_name, column.name}); - if (function_arguments_nodes.size() == 1) - { - if (column_type.isArray()) - { - if (function_name == "length" || function_name == "empty" || function_name == "notEmpty") - ++data.optimized_identifiers_count[qualified_name]; - } - else if (column_type.isNullable()) - { - if (function_name == "count" || function_name == "isNull" || function_name == "isNotNull") - ++data.optimized_identifiers_count[qualified_name]; - } - else if (column_type.isMap()) - { - if (function_name == "length" || function_name == "mapKeys" || function_name == "mapValues") - ++data.optimized_identifiers_count[qualified_name]; - } - } - else if (function_arguments_nodes.size() == 2) - { - const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); - if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) - { - const auto & constant_value = second_argument_constant_node->getValue(); - const auto & constant_value_type = constant_value.getType(); - - if (constant_value_type == Field::Types::String || constant_value_type == Field::Types::UInt64) - ++data.optimized_identifiers_count[qualified_name]; - } - else if (function_name == "variantElement" && column_type.isVariant() && second_argument_constant_node) - { - if (second_argument_constant_node->getValue().getType() == Field::Types::String) - ++data.optimized_identifiers_count[qualified_name]; - } - else if (function_name == "mapContains" && column_type.isMap()) - { - ++data.optimized_identifiers_count[qualified_name]; - } - } + if (node_transformers.contains({column.type->getTypeId(), function_node.getFunctionName()})) + ++data.optimized_identifiers_count[qualified_name]; } }; @@ -236,9 +371,6 @@ public: if (!function_node || !first_argument_column_node || !table_node) return; - auto & function_arguments_nodes = function_node->getArguments().getNodes(); - const auto & function_name = function_node->getFunctionName(); - auto column = first_argument_column_node->getColumn(); auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); @@ -246,176 +378,9 @@ public: if (!identifiers_to_optimize.contains(qualified_name)) return; - auto column_source = first_argument_column_node->getColumnSource(); - WhichDataType column_type(column.type); - - if (function_arguments_nodes.size() == 1) - { - if (column_type.isArray()) - { - if (function_name == "length") - { - /// Replace `length(array_argument)` with `array_argument.size0` - column.name += ".size0"; - column.type = std::make_shared(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "empty") - { - /// Replace `empty(array_argument)` with `equals(array_argument.size0, 0)` - column.name += ".size0"; - column.type = std::make_shared(); - - function_arguments_nodes.clear(); - function_arguments_nodes.push_back(std::make_shared(column, column_source)); - function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - - resolveOrdinaryFunctionNodeByName(*function_node, "equals", getContext()); - } - else if (function_name == "notEmpty") - { - /// Replace `notEmpty(array_argument)` with `notEquals(array_argument.size0, 0)` - column.name += ".size0"; - column.type = std::make_shared(); - - function_arguments_nodes.clear(); - function_arguments_nodes.push_back(std::make_shared(column, column_source)); - function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - - resolveOrdinaryFunctionNodeByName(*function_node, "notEquals", getContext()); - } - } - else if (column_type.isNullable()) - { - if (function_name == "count") - { - /// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))` - column.name += ".null"; - column.type = std::make_shared(); - - auto column_node = std::make_shared(column, column_source); - auto function_node_not = std::make_shared("not"); - - function_node_not->getArguments().getNodes().push_back(std::move(column_node)); - resolveOrdinaryFunctionNodeByName(*function_node_not, "not", getContext()); - - function_arguments_nodes = {std::move(function_node_not)}; - resolveAggregateFunctionNodeByName(*function_node, "sum"); - } - else if (function_name == "isNull") - { - /// Replace `isNull(nullable_argument)` with `nullable_argument.null` - column.name += ".null"; - column.type = std::make_shared(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "isNotNull") - { - /// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)` - column.name += ".null"; - column.type = std::make_shared(); - - function_arguments_nodes = {std::make_shared(column, column_source)}; - - resolveOrdinaryFunctionNodeByName(*function_node, "not", getContext()); - } - } - else if (column_type.isMap()) - { - if (function_name == "length") - { - /// Replace `length(map_argument)` with `map_argument.size0` - column.name += ".size0"; - column.type = std::make_shared(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "mapKeys") - { - /// Replace `mapKeys(map_argument)` with `map_argument.keys` - column.name += ".keys"; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "mapValues") - { - /// Replace `mapValues(map_argument)` with `map_argument.values` - column.name += ".values"; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - } - } - else if (function_arguments_nodes.size() == 2) - { - const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); - if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) - { - /** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` - * with `tuple_argument.column_name`. - */ - const auto & tuple_element_constant_value = second_argument_constant_node->getValue(); - const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType(); - - const auto & data_type_tuple = assert_cast(*column.type); - - String subcolumn_name; - - if (tuple_element_constant_value_type == Field::Types::String) - { - subcolumn_name = tuple_element_constant_value.get(); - } - else if (tuple_element_constant_value_type == Field::Types::UInt64) - { - auto tuple_column_index = tuple_element_constant_value.get(); - subcolumn_name = data_type_tuple.getNameByPosition(tuple_column_index); - } - else - { - return; - } - - column.name += '.'; - column.name += subcolumn_name; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "variantElement" && isVariant(column_type) && second_argument_constant_node) - { - /// Replace `variantElement(variant_argument, type_name)` with `variant_argument.type_name`. - const auto & variant_element_constant_value = second_argument_constant_node->getValue(); - String subcolumn_name; - - if (variant_element_constant_value.getType() != Field::Types::String) - return; - - subcolumn_name = variant_element_constant_value.get(); - - column.name += '.'; - column.name += subcolumn_name; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "mapContains" && column_type.isMap()) - { - const auto & data_type_map = assert_cast(*column.type); - - /// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)` - column.name += ".keys"; - column.type = std::make_shared(data_type_map.getKeyType()); - - auto has_function_argument = std::make_shared(column, column_source); - function_arguments_nodes[0] = std::move(has_function_argument); - - resolveOrdinaryFunctionNodeByName(*function_node, "has", getContext()); - } - } + auto transformer_it = node_transformers.find({column.type->getTypeId(), function_node->getFunctionName()}); + if (transformer_it != node_transformers.end()) + transformer_it->second(node, *function_node, *first_argument_column_node, getContext()); } }; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference index 90596ce1000..50f21842ac1 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference @@ -2,7 +2,23 @@ SELECT `m.size0` AS `length(m)` FROM t_func_to_subcolumns_map 2 1 +SELECT `m.size0` = 0 AS `empty(m)` +FROM t_func_to_subcolumns_map +0 +0 +SELECT `m.size0` != 0 AS `notEmpty(m)` +FROM t_func_to_subcolumns_map +1 +1 SELECT __table1.`m.size0` AS `length(m)` FROM default.t_func_to_subcolumns_map AS __table1 2 1 +SELECT __table1.`m.size0` = 0 AS `empty(m)` +FROM default.t_func_to_subcolumns_map AS __table1 +0 +0 +SELECT __table1.`m.size0` != 0 AS `notEmpty(m)` +FROM default.t_func_to_subcolumns_map AS __table1 +1 +1 diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql index b5687696b43..c574e1033c0 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql @@ -10,10 +10,22 @@ SET allow_experimental_analyzer = 0; EXPLAIN SYNTAX SELECT length(m) FROM t_func_to_subcolumns_map; SELECT length(m) FROM t_func_to_subcolumns_map; +EXPLAIN SYNTAX SELECT empty(m) FROM t_func_to_subcolumns_map; +SELECT empty(m) FROM t_func_to_subcolumns_map; + +EXPLAIN SYNTAX SELECT notEmpty(m) FROM t_func_to_subcolumns_map; +SELECT notEmpty(m) FROM t_func_to_subcolumns_map; + SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(m) FROM t_func_to_subcolumns_map; SELECT length(m) FROM t_func_to_subcolumns_map; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT empty(m) FROM t_func_to_subcolumns_map; +SELECT empty(m) FROM t_func_to_subcolumns_map; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT notEmpty(m) FROM t_func_to_subcolumns_map; +SELECT notEmpty(m) FROM t_func_to_subcolumns_map; + DROP TABLE t_func_to_subcolumns_map; From 361b5a20771b17305b7d11e626c6f9eba9b77fe3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 7 Feb 2024 18:19:15 +0000 Subject: [PATCH 020/439] more refactoring of FunctionToSubcolumnsPass --- .../Passes/FunctionToSubcolumnsPass.cpp | 86 ++++++++++--------- 1 file changed, 47 insertions(+), 39 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index f0392a0d9d4..954ae6df13e 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -26,31 +26,40 @@ namespace DB namespace { -void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnNode & column_node, ContextPtr) +struct ColumnContext +{ + NameAndTypePair column; + QueryTreeNodePtr column_source; + ContextPtr context; +}; + +using NodeToSubcolumnTransformer = std::function; + +void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx) { /// Replace `length(argument)` with `argument.size0` /// `argument` may be Array or Map. - NameAndTypePair column{column_node.getColumnName() + ".size0", std::make_shared()}; - node = std::make_shared(column, column_node.getColumnSource()); + NameAndTypePair column{ctx.column.name + ".size0", std::make_shared()}; + node = std::make_shared(column, ctx.column_source); } template -void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) +void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `empty(argument)` with `equals(argument.size0, 0)` if positive /// Replace `notEmpty(argument)` with `notEquals(argument.size0, 0)` if not positive /// `argument` may be Array or Map. - NameAndTypePair column{column_node.getColumnName() + ".size0", std::make_shared()}; + NameAndTypePair column{ctx.column.name + ".size0", std::make_shared()}; auto & function_arguments_nodes = function_node.getArguments().getNodes(); function_arguments_nodes.clear(); - function_arguments_nodes.push_back(std::make_shared(column, column_node.getColumnSource())); + function_arguments_nodes.push_back(std::make_shared(column, ctx.column_source)); function_arguments_nodes.push_back(std::make_shared(static_cast(0))); auto function_name = positive ? "equals" : "notEquals"; - resolveOrdinaryFunctionNodeByName(function_node, function_name, std::move(context)); + resolveOrdinaryFunctionNodeByName(function_node, function_name, std::move(ctx.context)); } String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple) @@ -73,7 +82,7 @@ String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &) } template -void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) +void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` with `tuple_argument.column_name`. /// Replace `variantElement(variant_argument, string_literal)` with `variant_argument.column_name`. @@ -86,19 +95,16 @@ void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & funct if (!second_argument_constant_node) return; - auto column_type = column_node.getColumnType(); - const auto & data_type_concrete = assert_cast(*column_type); - + const auto & data_type_concrete = assert_cast(*ctx.column.type); auto subcolumn_name = getSubcolumnNameForElement(second_argument_constant_node->getValue(), data_type_concrete); + if (subcolumn_name.empty()) return; - NameAndTypePair column{column_node.getColumnName() + "." + subcolumn_name, function_node.getResultType()}; - node = std::make_shared(column, column_node.getColumnSource()); + NameAndTypePair column{ctx.column.name + "." + subcolumn_name, function_node.getResultType()}; + node = std::make_shared(column, ctx.column_source); } -using NodeToSubcolumnTransformer = std::function; - std::map, NodeToSubcolumnTransformer> node_transformers = { { @@ -121,52 +127,51 @@ std::map, NodeToSubcolumnTransformer> node_transfor }, { {TypeIndex::Map, "mapKeys"}, - [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) + [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `mapKeys(map_argument)` with `map_argument.keys` - NameAndTypePair column{column_node.getColumnName() + ".keys", function_node.getResultType()}; - node = std::make_shared(column, column_node.getColumnSource()); + NameAndTypePair column{ctx.column.name + ".keys", function_node.getResultType()}; + node = std::make_shared(column, ctx.column_source); }, }, { {TypeIndex::Map, "mapValues"}, - [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) + [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `mapValues(map_argument)` with `map_argument.values` - NameAndTypePair column{column_node.getColumnName() + ".values", function_node.getResultType()}; - node = std::make_shared(column, column_node.getColumnSource()); + NameAndTypePair column{ctx.column.name + ".values", function_node.getResultType()}; + node = std::make_shared(column, ctx.column_source); }, }, { {TypeIndex::Map, "mapContains"}, - [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)` - auto column_type = column_node.getColumnType(); - const auto & data_type_map = assert_cast(*column_type); + const auto & data_type_map = assert_cast(*ctx.column.type); - NameAndTypePair column{column_node.getColumnName() + ".keys", std::make_shared(data_type_map.getKeyType())}; + NameAndTypePair column{ctx.column.name + ".keys", std::make_shared(data_type_map.getKeyType())}; auto & function_arguments_nodes = function_node.getArguments().getNodes(); - auto has_function_argument = std::make_shared(column, column_node.getColumnSource()); + auto has_function_argument = std::make_shared(column, ctx.column_source); function_arguments_nodes[0] = std::move(has_function_argument); - resolveOrdinaryFunctionNodeByName(function_node, "has", context); + resolveOrdinaryFunctionNodeByName(function_node, "has", ctx.context); }, }, { {TypeIndex::Nullable, "count"}, - [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))` - NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; + NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; auto & function_arguments_nodes = function_node.getArguments().getNodes(); - auto new_column_node = std::make_shared(column, column_node.getColumnSource()); + auto new_column_node = std::make_shared(column, ctx.column_source); auto function_node_not = std::make_shared("not"); function_node_not->getArguments().getNodes().push_back(std::move(new_column_node)); - resolveOrdinaryFunctionNodeByName(*function_node_not, "not", context); + resolveOrdinaryFunctionNodeByName(*function_node_not, "not", ctx.context); function_arguments_nodes = {std::move(function_node_not)}; resolveAggregateFunctionNodeByName(function_node, "sum"); @@ -174,23 +179,23 @@ std::map, NodeToSubcolumnTransformer> node_transfor }, { {TypeIndex::Nullable, "isNull"}, - [](QueryTreeNodePtr & node, FunctionNode &, ColumnNode & column_node, ContextPtr) + [](QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx) { /// Replace `isNull(nullable_argument)` with `nullable_argument.null` - NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; - node = std::make_shared(column, column_node.getColumnSource()); + NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; + node = std::make_shared(column, ctx.column_source); }, }, { {TypeIndex::Nullable, "isNotNull"}, - [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)` - NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; + NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; auto & function_arguments_nodes = function_node.getArguments().getNodes(); - function_arguments_nodes = {std::make_shared(column, column_node.getColumnSource())}; - resolveOrdinaryFunctionNodeByName(function_node, "not", context); + function_arguments_nodes = {std::make_shared(column, ctx.column_source)}; + resolveOrdinaryFunctionNodeByName(function_node, "not", ctx.context); }, }, { @@ -380,7 +385,10 @@ public: auto transformer_it = node_transformers.find({column.type->getTypeId(), function_node->getFunctionName()}); if (transformer_it != node_transformers.end()) - transformer_it->second(node, *function_node, *first_argument_column_node, getContext()); + { + ColumnContext ctx{std::move(column), first_argument_column_node->getColumnSource(), getContext()}; + transformer_it->second(node, *function_node, ctx); + } } }; From 1da258bfda0fcae33451efd670e8486910052c40 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 4 Mar 2024 15:35:03 +0000 Subject: [PATCH 021/439] better functions to subcolumns optimization --- .../Passes/FunctionToSubcolumnsPass.cpp | 96 ++++++----- .../RewriteAggregateFunctionWithIfPass.cpp | 2 +- ...functions_to_subcolumns_analyzer.reference | 149 +++++++++++++++++- ...01872_functions_to_subcolumns_analyzer.sql | 12 +- ...03_functions_to_subcolumns_final.reference | 25 +++ .../03003_functions_to_subcolumns_final.sql | 23 +++ 6 files changed, 242 insertions(+), 65 deletions(-) create mode 100644 tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference create mode 100644 tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 4ebcd59d8ec..8ba33a50ccf 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -58,8 +58,8 @@ void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, Col function_arguments_nodes.push_back(std::make_shared(column, ctx.column_source)); function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - auto function_name = positive ? "equals" : "notEquals"; - resolveOrdinaryFunctionNodeByName(function_node, function_name, std::move(ctx.context)); + const auto * function_name = positive ? "equals" : "notEquals"; + resolveOrdinaryFunctionNodeByName(function_node, function_name, ctx.context); } String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple) @@ -246,24 +246,11 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - struct Data - { - bool has_final = false; - std::unordered_set all_key_columns; - std::unordered_map indentifiers_count; - std::unordered_map optimized_identifiers_count; - }; - - Data getData() const { return data; } - void enterImpl(const QueryTreeNodePtr & node) { if (!getSettings().optimize_functions_to_subcolumns) return; - if (data.has_final) - return; - if (auto * table_node = node->as()) { enterImpl(*table_node); @@ -284,18 +271,45 @@ public: } } + std::unordered_set getIdentifiersToOptimize() const + { + /// Do not optimize if full column is requested in other context. + /// It doesn't make sense because it doesn't reduce amount of read data + /// and optimized functions are not computation heavy. But introducing + /// new identifier complicates query analysis and may break it. + /// + /// E.g. query: + /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) + /// may be optimized to incorrect query: + /// SELECT n FROM table GROUP BY n HAVING not(n.null) + /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) + /// + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + + std::unordered_set identifiers_to_optimize; + for (const auto & [identifier, count] : optimized_identifiers_count) + { + if (all_key_columns.contains(identifier)) + continue; + + auto it = identifiers_count.find(identifier); + if (it != identifiers_count.end() && it->second == count) + identifiers_to_optimize.insert(identifier); + } + + return identifiers_to_optimize; + } + private: - Data data; + std::unordered_set all_key_columns; + std::unordered_map identifiers_count; + std::unordered_map optimized_identifiers_count; NameSet processed_tables; void enterImpl(const TableNode & table_node) { - if (table_node.hasTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal()) - { - data.has_final = true; - return; - } - auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); if (processed_tables.emplace(table_name).second) return; @@ -305,7 +319,7 @@ private: for (const auto & column_name : key_columns) { Identifier identifier({table_name, column_name}); - data.all_key_columns.insert(identifier); + all_key_columns.insert(identifier); } }; @@ -337,18 +351,23 @@ private: auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); Identifier qualified_name({table_name, column_node.getColumnName()}); - ++data.indentifiers_count[qualified_name]; + ++identifiers_count[qualified_name]; } void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node) { + /// For queries with FINAL converting function to subcolumn may alter + /// special merging algorithms and produce wrong result of query. + if (table_node.hasTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal()) + return; + auto column = first_argument_column_node.getColumn(); auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); Identifier qualified_name({table_name, column.name}); if (node_transformers.contains({column.type->getTypeId(), function_node.getFunctionName()})) - ++data.optimized_identifiers_count[qualified_name]; + ++optimized_identifiers_count[qualified_name]; } }; @@ -398,32 +417,7 @@ void FunctionToSubcolumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPt { FunctionToSubcolumnsVisitorFirstPass first_visitor(context); first_visitor.visit(query_tree_node); - auto data = first_visitor.getData(); - - /// For queries with FINAL converting function to subcolumn may alter - /// special merging algorithms and produce wrong result of query. - if (data.has_final) - return; - - /// Do not optimize if full column is requested in other context. - /// It doesn't make sense because it doesn't reduce amount of read data - /// and optimized functions are not computation heavy. But introducing - /// new identifier complicates query analysis and may break it. - /// - /// E.g. query: - /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) - /// may be optimized to incorrect query: - /// SELECT n FROM table GROUP BY n HAVING not(n.null) - /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) - /// - /// Do not optimize index columns (primary, min-max, secondary), - /// because otherwise analysis of indexes may be broken. - /// TODO: handle subcolumns in index analysis. - - std::unordered_set identifiers_to_optimize; - for (const auto & [identifier, count] : data.optimized_identifiers_count) - if (!data.all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) - identifiers_to_optimize.insert(identifier); + auto identifiers_to_optimize = first_visitor.getIdentifiersToOptimize(); if (identifiers_to_optimize.empty()) return; diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index a8041b5b0a9..c73ff524d1f 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -88,7 +88,7 @@ private: static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node) { auto result_type = function_node.getResultType(); - auto suffix = result_type->isNullable() ? "OrNullIf" : "If"; + const auto * suffix = result_type->isNullable() ? "OrNullIf" : "If"; resolveAggregateFunctionNodeByName(function_node, function_node.getFunctionName() + suffix); } }; diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference index ce5e46fa271..e409e9ad89f 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference @@ -1,5 +1,24 @@ 0 0 1 0 1 0 +QUERY id: 0 + PROJECTION COLUMNS + isNull(id) UInt8 + isNull(n) UInt8 + isNotNull(n) UInt8 + PROJECTION + LIST id: 1, nodes: 3 + FUNCTION id: 2, function_name: isNull, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5 + COLUMN id: 6, column_name: n.null, result_type: UInt8, source_id: 5 + FUNCTION id: 7, function_name: not, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: n.null, result_type: UInt8, source_id: 5 + JOIN TREE + TABLE id: 5, alias: __table1, table_name: default.t_func_to_subcolumns + SELECT __table1.id IS NULL AS `isNull(id)`, __table1.`n.null` AS `isNull(n)`, @@ -7,6 +26,32 @@ SELECT FROM default.t_func_to_subcolumns AS __table1 3 0 1 0 0 1 0 \N +QUERY id: 0 + PROJECTION COLUMNS + length(arr) UInt64 + empty(arr) UInt8 + notEmpty(arr) UInt8 + empty(n) Nullable(UInt8) + PROJECTION + LIST id: 1, nodes: 4 + COLUMN id: 2, column_name: arr.size0, result_type: UInt64, source_id: 3 + FUNCTION id: 4, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: arr.size0, result_type: UInt64, source_id: 3 + CONSTANT id: 7, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 8, function_name: notEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: arr.size0, result_type: UInt64, source_id: 3 + CONSTANT id: 11, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 12, function_name: empty, function_type: ordinary, result_type: Nullable(UInt8) + ARGUMENTS + LIST id: 13, nodes: 1 + COLUMN id: 14, column_name: n, result_type: Nullable(String), source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns + SELECT __table1.`arr.size0` AS `length(arr)`, __table1.`arr.size0` = 0 AS `empty(arr)`, @@ -15,19 +60,106 @@ SELECT FROM default.t_func_to_subcolumns AS __table1 ['foo','bar'] [1,2] [] [] +QUERY id: 0 + PROJECTION COLUMNS + mapKeys(m) Array(String) + mapValues(m) Array(UInt64) + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: m.keys, result_type: Array(String), source_id: 3 + COLUMN id: 4, column_name: m.values, result_type: Array(UInt64), source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns + SELECT __table1.`m.keys` AS `mapKeys(m)`, __table1.`m.values` AS `mapValues(m)` FROM default.t_func_to_subcolumns AS __table1 1 +QUERY id: 0 + PROJECTION COLUMNS + count(n) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: not, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: n.null, result_type: UInt8, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.t_func_to_subcolumns + SELECT sum(NOT __table1.`n.null`) AS `count(n)` FROM default.t_func_to_subcolumns AS __table1 2 +QUERY id: 0 + PROJECTION COLUMNS + count(id) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5 + JOIN TREE + TABLE id: 5, alias: __table1, table_name: default.t_func_to_subcolumns + SELECT count(__table1.id) AS `count(id)` FROM default.t_func_to_subcolumns AS __table1 1 0 0 2 1 0 3 0 0 +QUERY id: 0 + PROJECTION COLUMNS + id UInt64 + isNull(n) UInt8 + isNull(right.n) UInt8 + PROJECTION + LIST id: 1, nodes: 3 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: n.null, result_type: UInt8, source_id: 3 + FUNCTION id: 5, function_name: isNull, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 6, nodes: 1 + COLUMN id: 7, column_name: n, result_type: String, source_id: 8 + JOIN TREE + JOIN id: 9, strictness: ALL, kind: FULL + LEFT TABLE EXPRESSION + TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns + RIGHT TABLE EXPRESSION + UNION id: 8, alias: __table2, is_subquery: 1, union_mode: UNION_ALL + QUERIES + LIST id: 10, nodes: 2 + QUERY id: 11, alias: __table3 + PROJECTION COLUMNS + id UInt8 + n String + PROJECTION + LIST id: 12, nodes: 2 + CONSTANT id: 13, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 14, constant_value: \'qqq\', constant_value_type: String + JOIN TREE + TABLE id: 15, alias: __table4, table_name: system.one + QUERY id: 16, alias: __table5 + PROJECTION COLUMNS + id UInt8 + \'www\' String + PROJECTION + LIST id: 17, nodes: 2 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + CONSTANT id: 19, constant_value: \'www\', constant_value_type: String + JOIN TREE + TABLE id: 20, alias: __table6, table_name: system.one + JOIN EXPRESSION + LIST id: 21, nodes: 1 + COLUMN id: 22, column_name: id, result_type: UInt64, source_id: 9 + EXPRESSION + LIST id: 23, nodes: 2 + COLUMN id: 24, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 25, column_name: id, result_type: UInt8, source_id: 8 + SELECT __table1.id AS id, __table1.`n.null` AS `isNull(n)`, @@ -35,16 +167,19 @@ SELECT FROM default.t_func_to_subcolumns AS __table1 ALL FULL OUTER JOIN ( - + ( SELECT - 1 AS id, - \'qqq\' AS n - FROM system.one AS __table4 + 1 AS id, + \'qqq\' AS n + FROM system.one AS __table4 + ) UNION ALL + ( SELECT - 3 AS id, - \'www\' AS `\'www\'` - FROM system.one AS __table6 + 3 AS id, + \'www\' AS `\'www\'` + FROM system.one AS __table6 + ) ) AS __table2 USING (id) 0 10 0 20 diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql index c1ab6909e2f..b544f6829cf 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql @@ -9,24 +9,24 @@ ENGINE = MergeTree ORDER BY tuple(); INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map()); SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; SELECT count(n) FROM t_func_to_subcolumns; -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT count(n) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT count(n) FROM t_func_to_subcolumns; SELECT count(id) FROM t_func_to_subcolumns; -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT count(id) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT count(id) FROM t_func_to_subcolumns; SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); DROP TABLE t_func_to_subcolumns; diff --git a/tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference new file mode 100644 index 00000000000..3051c199363 --- /dev/null +++ b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference @@ -0,0 +1,25 @@ +3 +2 +SELECT __table1.`arr.size0` AS `length(arr)` +FROM default.t_length_1 AS __table1 +WHERE __table1.`arr.size0` IN ( + SELECT __table1.arr_length AS arr_length + FROM default.t_length_2 AS __table1 +) +2 +SELECT __table1.`arr.size0` AS `length(arr)` +FROM default.t_length_1 AS __table1 +WHERE __table1.`arr.size0` IN ( + SELECT __table1.arr_length AS arr_length + FROM default.t_length_2 AS __table1 + FINAL +) +2 +SELECT length(__table1.arr) AS `length(arr)` +FROM default.t_length_1 AS __table1 +FINAL +WHERE length(__table1.arr) IN ( + SELECT __table1.arr_length AS arr_length + FROM default.t_length_2 AS __table1 + FINAL +) diff --git a/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql new file mode 100644 index 00000000000..5975347ad09 --- /dev/null +++ b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS t_length_1; +DROP TABLE IF EXISTS t_length_2; + +SET allow_experimental_analyzer = 1; +SET optimize_on_insert = 0; + +CREATE TABLE t_length_1 (id UInt64, arr Array(UInt64)) ENGINE = ReplacingMergeTree ORDER BY id; +CREATE TABLE t_length_2 (id UInt64, arr_length UInt64) ENGINE = ReplacingMergeTree ORDER BY id; + +INSERT INTO t_length_1 VALUES (1, [1, 2, 3]), (2, [4, 5]); +INSERT INTO t_length_2 VALUES (1, 3), (1, 2), (2, 2); + +SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2); + +SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); + +SELECT length(arr) FROM t_length_1 FINAL WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr) FROM t_length_1 FINAL WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); + +DROP TABLE t_length_1; +DROP TABLE t_length_2; From c0dd9b13aa09085a03b0e54b67ed4ff5c46f4336 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 4 Mar 2024 15:39:39 +0000 Subject: [PATCH 022/439] update docs --- docs/en/operations/settings/settings.md | 8 ++++---- docs/ru/operations/settings/settings.md | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 622644a1543..2273aa8c472 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1776,7 +1776,7 @@ Default value: 0 (no restriction). ## insert_quorum {#insert_quorum} :::note -This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information. +This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information. ::: Enables the quorum writes. @@ -1819,7 +1819,7 @@ See also: ## insert_quorum_parallel {#insert_quorum_parallel} :::note -This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information. +This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information. ::: Enables or disables parallelism for quorum `INSERT` queries. If enabled, additional `INSERT` queries can be sent while previous queries have not yet finished. If disabled, additional writes to the same table will be rejected. @@ -1840,7 +1840,7 @@ See also: ## select_sequential_consistency {#select_sequential_consistency} :::note -This setting differ in behavior between SharedMergeTree and ReplicatedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information about the behavior of `select_sequential_consistency` in SharedMergeTree. +This setting differ in behavior between SharedMergeTree and ReplicatedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information about the behavior of `select_sequential_consistency` in SharedMergeTree. ::: Enables or disables sequential consistency for `SELECT` queries. Requires `insert_quorum_parallel` to be disabled (enabled by default). @@ -2504,7 +2504,7 @@ Possible values: - 0 — Optimization disabled. - 1 — Optimization enabled. -Default value: `0`. +Default value: `1`. ## optimize_trivial_count_query {#optimize-trivial-count-query} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index a56afda641b..f4eecc615b2 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2077,7 +2077,7 @@ SELECT * FROM test_table - 0 — оптимизация отключена. - 1 — оптимизация включена. -Значение по умолчанию: `0`. +Значение по умолчанию: `1`. ## optimize_trivial_count_query {#optimize-trivial-count-query} @@ -2798,7 +2798,7 @@ SELECT TOP 3 name, value FROM system.settings; ``` ### output_format_pretty_color {#output_format_pretty_color} -Включает/выключает управляющие последовательности ANSI в форматах Pretty. +Включает/выключает управляющие последовательности ANSI в форматах Pretty. Возможные значения: @@ -4123,7 +4123,7 @@ SELECT sum(number) FROM numbers(10000000000) SETTINGS partial_result_on_first_ca ## session_timezone {#session_timezone} Задаёт значение часового пояса (session_timezone) по умолчанию для текущей сессии вместо [часового пояса сервера](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone). То есть, все значения DateTime/DateTime64, для которых явно не задан часовой пояс, будут интерпретированы как относящиеся к указанной зоне. -При значении настройки `''` (пустая строка), будет совпадать с часовым поясом сервера. +При значении настройки `''` (пустая строка), будет совпадать с часовым поясом сервера. Функции `timeZone()` and `serverTimezone()` возвращают часовой пояс текущей сессии и сервера соответственно. From ed3c36debefe6c8a59f8482c75ef9f70c27c9bc3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 4 Mar 2024 19:59:22 +0000 Subject: [PATCH 023/439] fix tests --- src/Core/SettingsChangesHistory.h | 3 +++ .../0_stateless/02116_tuple_element_analyzer.reference | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index face1def4b4..afb9b201f50 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,6 +85,9 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"24.3", { + {"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"}, + }}, {"24.2", { {"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, diff --git a/tests/queries/0_stateless/02116_tuple_element_analyzer.reference b/tests/queries/0_stateless/02116_tuple_element_analyzer.reference index d30f3a6cc58..22d48ffb2f3 100644 --- a/tests/queries/0_stateless/02116_tuple_element_analyzer.reference +++ b/tests/queries/0_stateless/02116_tuple_element_analyzer.reference @@ -15,8 +15,8 @@ SELECT __table1.`t2.1` AS `tupleElement(t2, 1)` FROM default.t_tuple_element AS __table1 1 2 SELECT - 1 AS `tupleElement(t, 1)`, - 2 AS `tupleElement(t, 2)` + _CAST(1, \'UInt8\') AS `tupleElement(t, 1)`, + _CAST(2, \'UInt8\') AS `tupleElement(t, 2)` FROM system.one AS __table1 1 2 SELECT From 7ac0ebbaca2c9032b04a6e77456e1c5b7f325f2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 20 Mar 2024 16:11:12 +0100 Subject: [PATCH 024/439] Test jeaiii itoa --- base/base/itoa.cpp | 522 +++++++++++++++++++++------------------------ 1 file changed, 241 insertions(+), 281 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index fd8fd8de025..4587d3e3e82 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -1,296 +1,256 @@ -// Based on https://github.com/amdn/itoa and combined with our optimizations -// -//=== itoa.cpp - Fast integer to ascii conversion --*- C++ -*-// -// -// The MIT License (MIT) -// Copyright (c) 2016 Arturo Martin-de-Nicolas -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -//===----------------------------------------------------------------------===// - -#include -#include -#include #include #include #include #include -namespace +namespace jeaiii { -template -ALWAYS_INLINE inline constexpr T pow10(size_t x) -{ - return x ? 10 * pow10(x - 1) : 1; -} +/* + MIT License -// Division by a power of 10 is implemented using a multiplicative inverse. -// This strength reduction is also done by optimizing compilers, but -// presently the fastest results are produced by using the values -// for the multiplication and the shift as given by the algorithm -// described by Agner Fog in "Optimizing Subroutines in Assembly Language" -// -// http://www.agner.org/optimize/optimizing_assembly.pdf -// -// "Integer division by a constant (all processors) -// A floating point number can be divided by a constant by multiplying -// with the reciprocal. If we want to do the same with integers, we have -// to scale the reciprocal by 2n and then shift the product to the right -// by n. There are various algorithms for finding a suitable value of n -// and compensating for rounding errors. The algorithm described below -// was invented by Terje Mathisen, Norway, and not published elsewhere." + Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa -/// Division by constant is performed by: -/// 1. Adding 1 if needed; -/// 2. Multiplying by another constant; -/// 3. Shifting right by another constant. -template -struct Division -{ - static constexpr bool add{add_}; - static constexpr UInt multiplier{multiplier_}; - static constexpr unsigned shift{shift_}; -}; + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: -/// Select a type with appropriate number of bytes from the list of types. -/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes. -/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t. -template -struct SelectType -{ - using Result = typename SelectType::Result; -}; + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. -template -struct SelectType<1, T, Ts...> -{ - using Result = T; -}; + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + using u32 = decltype(0xffffffff); + using u64 = decltype(0xffffffffffffffff); - -/// Division by 10^N where N is the size of the type. -template -using DivisionBy10PowN = typename SelectType< - N, - Division, /// divide by 10 - Division, /// divide by 100 - Division, /// divide by 10000 - Division /// divide by 100000000 - >::Result; - -template -using UnsignedOfSize = typename SelectType::Result; - -/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in -template -struct QuotientAndRemainder -{ - UnsignedOfSize quotient; // quotient with fewer than 2*N decimal digits - UnsignedOfSize remainder; // remainder with at most N decimal digits -}; - -template -QuotientAndRemainder inline split(UnsignedOfSize value) -{ - constexpr DivisionBy10PowN division; - - UnsignedOfSize quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift; - UnsignedOfSize remainder = static_cast>(value - quotient * pow10>(N)); - - return {quotient, remainder}; -} - -ALWAYS_INLINE inline char * outDigit(char * p, uint8_t value) -{ - *p = '0' + value; - ++p; - return p; -} - -// Using a lookup table to convert binary numbers from 0 to 99 -// into ascii characters as described by Andrei Alexandrescu in -// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ - -const char digits[201] = "00010203040506070809" - "10111213141516171819" - "20212223242526272829" - "30313233343536373839" - "40414243444546474849" - "50515253545556575859" - "60616263646566676869" - "70717273747576777879" - "80818283848586878889" - "90919293949596979899"; - -ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) -{ - memcpy(p, &digits[value * 2], 2); - p += 2; - return p; -} - -namespace convert -{ -template -char * head(char * p, UInt u); -template -char * tail(char * p, UInt u); - -//===----------------------------------------------------------===// -// head: find most significant digit, skip leading zeros -//===----------------------------------------------------------===// - -// "x" contains quotient and remainder after division by 10^N -// quotient is less than 10^N -template -ALWAYS_INLINE inline char * head(char * p, QuotientAndRemainder x) -{ - p = head(p, UnsignedOfSize(x.quotient)); - p = tail(p, x.remainder); - return p; -} - -// "u" is less than 10^2*N -template -ALWAYS_INLINE inline char * head(char * p, UInt u) -{ - return u < pow10>(N) ? head(p, UnsignedOfSize(u)) : head(p, split(u)); -} - -// recursion base case, selected when "u" is one byte -template <> -ALWAYS_INLINE inline char * head, 1>(char * p, UnsignedOfSize<1> u) -{ - return u < 10 ? outDigit(p, u) : outTwoDigits(p, u); -} - -//===----------------------------------------------------------===// -// tail: produce all digits including leading zeros -//===----------------------------------------------------------===// - -// recursive step, "u" is less than 10^2*N -template -ALWAYS_INLINE inline char * tail(char * p, UInt u) -{ - QuotientAndRemainder x = split(u); - p = tail(p, UnsignedOfSize(x.quotient)); - p = tail(p, x.remainder); - return p; -} - -// recursion base case, selected when "u" is one byte -template <> -ALWAYS_INLINE inline char * tail, 1>(char * p, UnsignedOfSize<1> u) -{ - return outTwoDigits(p, u); -} - -//===----------------------------------------------------------===// -// large values are >= 10^2*N -// where x contains quotient and remainder after division by 10^N -//===----------------------------------------------------------===// -template -ALWAYS_INLINE inline char * large(char * p, QuotientAndRemainder x) -{ - QuotientAndRemainder y = split(x.quotient); - p = head(p, UnsignedOfSize(y.quotient)); - p = tail(p, y.remainder); - p = tail(p, x.remainder); - return p; -} - -//===----------------------------------------------------------===// -// handle values of "u" that might be >= 10^2*N -// where N is the size of "u" in bytes -//===----------------------------------------------------------===// -template -ALWAYS_INLINE inline char * uitoa(char * p, UInt u) -{ - if (u < pow10>(N)) - return head(p, UnsignedOfSize(u)); - QuotientAndRemainder x = split(u); - - return u < pow10>(2 * N) ? head(p, x) : large(p, x); -} - -// selected when "u" is one byte -template <> -ALWAYS_INLINE inline char * uitoa, 1>(char * p, UnsignedOfSize<1> u) -{ - if (u < 10) - return outDigit(p, u); - else if (u < 100) - return outTwoDigits(p, u); - else + struct pair { - p = outDigit(p, u / 100); - p = outTwoDigits(p, u % 100); - return p; + char dd[2]; + constexpr pair(char c) : dd{ c, '\0' } { } + constexpr pair(int n) : dd{ "0123456789"[n / 10], "0123456789"[n % 10] } { } + }; + + constexpr struct + { + pair dd[100] + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + }; + pair fd[100] + { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + }; + } + digits; + + constexpr u64 mask24 = (u64(1) << 24) - 1; + constexpr u64 mask32 = (u64(1) << 32) - 1; + constexpr u64 mask57 = (u64(1) << 57) - 1; + + template struct _cond { using type = F; }; + template struct _cond { using type = T; }; + template using cond = typename _cond::type; + + template + inline ALWAYS_INLINE + char* to_text_from_integer(char* b, T i) + { + constexpr auto q = sizeof(T); + using U = cond>>; + + // convert bool to int before test with unary + to silence warning if T happens to be bool + U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i); + + if (n < u32(1e2)) + { + *reinterpret_cast(b) = digits.fd[n]; + return n < 10 ? b + 1 : b + 2; + } + if (n < u32(1e6)) + { + if (n < u32(1e4)) + { + auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= n < u32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + return b + 4; + } + auto f0 = u64(10 * (1ull << 32ull)/ 1e5 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= n < u32(1e5); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + return b + 6; + } + if (n < u64(1ull << 32ull)) + { + if (n < u32(1e8)) + { + auto f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= n < u32(1e7); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + return b + 8; + } + auto f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 57]; + b -= n < u32(1e9); + auto f2 = (f0 & mask57) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; + auto f4 = (f2 & mask57) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; + auto f6 = (f4 & mask57) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; + auto f8 = (f6 & mask57) * 100; + *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; + return b + 10; + } + + // if we get here U must be u64 but some compilers don't know that, so reassign n to a u64 to avoid warnings + u32 z = n % u32(1e8); + u64 u = n / u32(1e8); + + if (u < u32(1e2)) + { + // u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number) + *reinterpret_cast(b) = digits.dd[u]; + b += 2; + } + else if (u < u32(1e6)) + { + if (u < u32(1e4)) + { + auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= u < u32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + b += 4; + } + else + { + auto f0 = u64(10 * (1ull << 32ull) / 1e5 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= u < u32(1e5); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + b += 6; + } + } + else if (u < u32(1e8)) + { + auto f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= u < u32(1e7); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + b += 8; + } + else if (u < u64(1ull << 32ull)) + { + auto f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 57]; + b -= u < u32(1e9); + auto f2 = (f0 & mask57) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; + auto f4 = (f2 & mask57) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; + auto f6 = (f4 & mask57) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; + auto f8 = (f6 & mask57) * 100; + *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; + b += 10; + } + else + { + u32 y = u % u32(1e8); + u /= u32(1e8); + + // u is 2, 3, or 4 digits (if u < 10 it would have been handled above) + if (u < u32(1e2)) + { + *reinterpret_cast(b) = digits.dd[u]; + b += 2; + } + else + { + auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= u < u32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + b += 4; + } + // do 8 digits + auto f0 = (u64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1; + *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + b += 8; + } + // do 8 digits + auto f0 = (u64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1; + *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + return b + 8; } } -//===----------------------------------------------------------===// -// handle unsigned and signed integral operands -//===----------------------------------------------------------===// - -// itoa: handle unsigned integral operands (selected by SFINAE) -template && std::is_integral_v> * = nullptr> -ALWAYS_INLINE inline char * itoa(U u, char * p) +namespace { - return convert::uitoa(p, u); -} - -// itoa: handle signed integral operands (selected by SFINAE) -template && std::is_integral_v> * = nullptr> -ALWAYS_INLINE inline char * itoa(I i, char * p) +ALWAYS_INLINE inline void outTwoDigits(char * p, uint8_t value) { - // Need "mask" to be filled with a copy of the sign bit. - // If "i" is a negative value, then the result of "operator >>" - // is implementation-defined, though usually it is an arithmetic - // right shift that replicates the sign bit. - // Use a conditional expression to be portable, - // a good optimizing compiler generates an arithmetic right shift - // and avoids the conditional branch. - UnsignedOfSize mask = i < 0 ? ~UnsignedOfSize(0) : 0; - // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize. - // Cannot use std::abs() because the result is undefined - // in 2's complement systems for the most-negative value. - // Want to avoid conditional branch for performance reasons since - // CPU branch prediction will be ineffective when negative values - // occur randomly. - // Let "u" be "i" cast to unsigned type UnsignedOfSize. - // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative. - // This yields the absolute value with the desired type without - // using a conditional branch and without invoking undefined or - // implementation defined behavior: - UnsignedOfSize u = ((2 * UnsignedOfSize(i)) & ~mask) - UnsignedOfSize(i); - // Unconditionally store a minus sign when producing digits - // in a forward direction and increment the pointer only if - // the value is in fact negative. - // This avoids a conditional branch and is safe because we will - // always produce at least one digit and it will overwrite the - // minus sign when the value is not negative. - *p = '-'; - p += (mask & 1); - p = convert::uitoa(p, u); - return p; -} + *reinterpret_cast(p) = jeaiii::digits.fd[value]; } const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull; @@ -301,7 +261,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p) { /// If we the highest 64bit item is empty, we can print just the lowest item as u64 if (_x.items[UInt128::_impl::little(1)] == 0) - return convert::itoa(_x.items[UInt128::_impl::little(0)], p); + return jeaiii::to_text_from_integer(p, _x.items[UInt128::_impl::little(0)]); /// Doing operations using __int128 is faster and we already rely on this feature using T = unsigned __int128; @@ -332,7 +292,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p) current_block += max_multiple_of_hundred_blocks; } - char * highest_part_print = convert::itoa(uint64_t(x), p); + char * highest_part_print = jeaiii::to_text_from_integer(p, uint64_t(x)); for (int i = 0; i < current_block; i++) { outTwoDigits(highest_part_print, two_values[current_block - 1 - i]); @@ -448,12 +408,12 @@ ALWAYS_INLINE inline char * writeSIntText(T x, char * pos) char * itoa(UInt8 i, char * p) { - return convert::itoa(uint8_t(i), p); + return jeaiii::to_text_from_integer(p, uint8_t(i)); } char * itoa(Int8 i, char * p) { - return convert::itoa(int8_t(i), p); + return jeaiii::to_text_from_integer(p, int8_t(i)); } char * itoa(UInt128 i, char * p) @@ -479,7 +439,7 @@ char * itoa(Int256 i, char * p) #define DEFAULT_ITOA(T) \ char * itoa(T i, char * p) \ { \ - return convert::itoa(i, p); \ + return jeaiii::to_text_from_integer(p, i); \ } #define FOR_MISSING_INTEGER_TYPES(M) \ From 225db5e253f578483bcbeeb8f3063c241382d49e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 20 Mar 2024 17:04:52 +0100 Subject: [PATCH 025/439] Style --- base/base/itoa.cpp | 386 +++++++++++++++++++++++---------------------- 1 file changed, 194 insertions(+), 192 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index 4587d3e3e82..868fdedb176 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -28,229 +28,231 @@ namespace jeaiii OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - using u32 = decltype(0xffffffff); - using u64 = decltype(0xffffffffffffffff); +struct pair +{ + char dd[2]; + constexpr pair(char c) : dd{c, '\0'} { } + constexpr pair(int n) : dd{"0123456789"[n / 10], "0123456789"[n % 10]} { } +}; - struct pair - { - char dd[2]; - constexpr pair(char c) : dd{ c, '\0' } { } - constexpr pair(int n) : dd{ "0123456789"[n / 10], "0123456789"[n % 10] } { } +constexpr struct +{ + pair dd[100]{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, // + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, // + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, // + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, // + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, // + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, // + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, // + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, // }; + pair fd[100]{ + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', // + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, // + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, // + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, // + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, // + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, // + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, // + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, // + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, // + }; +} digits; - constexpr struct +constexpr UInt64 mask24 = (UInt64(1) << 24) - 1; +constexpr UInt64 mask32 = (UInt64(1) << 32) - 1; +constexpr UInt64 mask57 = (UInt64(1) << 57) - 1; + +template +struct _cond +{ + using type = F; +}; +template +struct _cond +{ + using type = T; +}; +template +using cond = typename _cond::type; + +template +inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) +{ + constexpr auto q = sizeof(T); + using U = cond>>; + + // convert bool to int before test with unary + to silence warning if T happens to be bool + U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i); + + if (n < UInt32(1e2)) { - pair dd[100] - { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, - 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, - }; - pair fd[100] - { - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, - 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, - }; + *reinterpret_cast(b) = digits.fd[n]; + return n < 10 ? b + 1 : b + 2; } - digits; - - constexpr u64 mask24 = (u64(1) << 24) - 1; - constexpr u64 mask32 = (u64(1) << 32) - 1; - constexpr u64 mask57 = (u64(1) << 57) - 1; - - template struct _cond { using type = F; }; - template struct _cond { using type = T; }; - template using cond = typename _cond::type; - - template - inline ALWAYS_INLINE - char* to_text_from_integer(char* b, T i) + if (n < UInt32(1e6)) { - constexpr auto q = sizeof(T); - using U = cond>>; - - // convert bool to int before test with unary + to silence warning if T happens to be bool - U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i); - - if (n < u32(1e2)) + if (n < UInt32(1e4)) { - *reinterpret_cast(b) = digits.fd[n]; - return n < 10 ? b + 1 : b + 2; + auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= n < UInt32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + return b + 4; } - if (n < u32(1e6)) + auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= n < UInt32(1e5); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + return b + 6; + } + if (n < UInt64(1ull << 32ull)) + { + if (n < UInt32(1e8)) { - if (n < u32(1e4)) - { - auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * n; - *reinterpret_cast(b) = digits.fd[f0 >> 24]; - b -= n < u32(1e3); - auto f2 = (f0 & mask24) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; - return b + 4; - } - auto f0 = u64(10 * (1ull << 32ull)/ 1e5 + 1) * n; - *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= n < u32(1e5); + auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= n < UInt32(1e7); auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; - return b + 6; - } - if (n < u64(1ull << 32ull)) - { - if (n < u32(1e8)) - { - auto f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; - *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= n < u32(1e7); - auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; - auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; - auto f6 = (f4 & mask32) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; - return b + 8; - } - auto f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * n; - *reinterpret_cast(b) = digits.fd[f0 >> 57]; - b -= n < u32(1e9); - auto f2 = (f0 & mask57) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; - auto f4 = (f2 & mask57) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; - auto f6 = (f4 & mask57) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; - auto f8 = (f6 & mask57) * 100; - *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; - return b + 10; - } - - // if we get here U must be u64 but some compilers don't know that, so reassign n to a u64 to avoid warnings - u32 z = n % u32(1e8); - u64 u = n / u32(1e8); - - if (u < u32(1e2)) - { - // u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number) - *reinterpret_cast(b) = digits.dd[u]; - b += 2; - } - else if (u < u32(1e6)) - { - if (u < u32(1e4)) - { - auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * u; - *reinterpret_cast(b) = digits.fd[f0 >> 24]; - b -= u < u32(1e3); - auto f2 = (f0 & mask24) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; - b += 4; - } - else - { - auto f0 = u64(10 * (1ull << 32ull) / 1e5 + 1) * u; - *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= u < u32(1e5); - auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; - auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; - b += 6; - } - } - else if (u < u32(1e8)) - { - auto f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16; - *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= u < u32(1e7); - auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; - auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; auto f6 = (f4 & mask32) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; - b += 8; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + return b + 8; } - else if (u < u64(1ull << 32ull)) + auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 57]; + b -= n < UInt32(1e9); + auto f2 = (f0 & mask57) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; + auto f4 = (f2 & mask57) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; + auto f6 = (f4 & mask57) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; + auto f8 = (f6 & mask57) * 100; + *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; + return b + 10; + } + + // if we get here U must be UInt64 but some compilers don't know that, so reassign n to a UInt64 to avoid warnings + UInt32 z = n % UInt32(1e8); + UInt64 u = n / UInt32(1e8); + + if (u < UInt32(1e2)) + { + // u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number) + *reinterpret_cast(b) = digits.dd[u]; + b += 2; + } + else if (u < UInt32(1e6)) + { + if (u < UInt32(1e4)) { - auto f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * u; - *reinterpret_cast(b) = digits.fd[f0 >> 57]; - b -= u < u32(1e9); - auto f2 = (f0 & mask57) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; - auto f4 = (f2 & mask57) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; - auto f6 = (f4 & mask57) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; - auto f8 = (f6 & mask57) * 100; - *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; - b += 10; + auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= u < UInt32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + b += 4; } else { - u32 y = u % u32(1e8); - u /= u32(1e8); - - // u is 2, 3, or 4 digits (if u < 10 it would have been handled above) - if (u < u32(1e2)) - { - *reinterpret_cast(b) = digits.dd[u]; - b += 2; - } - else - { - auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * u; - *reinterpret_cast(b) = digits.fd[f0 >> 24]; - b -= u < u32(1e3); - auto f2 = (f0 & mask24) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; - b += 4; - } - // do 8 digits - auto f0 = (u64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1; - *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= u < UInt32(1e5); auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; - auto f6 = (f4 & mask32) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; - b += 8; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + b += 6; + } + } + else if (u < UInt32(1e8)) + { + auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= u < UInt32(1e7); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + b += 8; + } + else if (u < UInt64(1ull << 32ull)) + { + auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 57]; + b -= u < UInt32(1e9); + auto f2 = (f0 & mask57) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; + auto f4 = (f2 & mask57) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; + auto f6 = (f4 & mask57) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; + auto f8 = (f6 & mask57) * 100; + *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; + b += 10; + } + else + { + UInt32 y = u % UInt32(1e8); + u /= UInt32(1e8); + + // u is 2, 3, or 4 digits (if u < 10 it would have been handled above) + if (u < UInt32(1e2)) + { + *reinterpret_cast(b) = digits.dd[u]; + b += 2; + } + else + { + auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= u < UInt32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + b += 4; } // do 8 digits - auto f0 = (u64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1; - *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1; + *reinterpret_cast(b) = digits.dd[f0 >> 32]; auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; auto f6 = (f4 & mask32) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; - return b + 8; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + b += 8; } + // do 8 digits + auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1; + *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + return b + 8; +} } namespace { ALWAYS_INLINE inline void outTwoDigits(char * p, uint8_t value) { - *reinterpret_cast(p) = jeaiii::digits.fd[value]; + *reinterpret_cast(p) = jeaiii::digits.fd[value]; } const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull; From de76be248b4f2b667fcd390874af5b296e91686f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 20 Mar 2024 17:56:01 +0100 Subject: [PATCH 026/439] Revert incorrect change on my part --- base/base/itoa.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index 868fdedb176..e7250764704 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -250,9 +250,24 @@ inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) namespace { -ALWAYS_INLINE inline void outTwoDigits(char * p, uint8_t value) +// Using a lookup table to convert binary numbers from 0 to 99 +// into ascii characters as described by Andrei Alexandrescu in +// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ +const char digits[201] = "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; +ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) { - *reinterpret_cast(p) = jeaiii::digits.fd[value]; + memcpy(p, &digits[value * 2], 2); + p += 2; + return p; } const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull; From c4fcc5946831da07bb148a1299bd2c7099c221a7 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 21 Mar 2024 16:59:30 +0000 Subject: [PATCH 027/439] fix test --- tests/queries/0_stateless/02116_tuple_element.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element.sql index ece7114e763..e3a5134f2b2 100644 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ b/tests/queries/0_stateless/02116_tuple_element.sql @@ -19,7 +19,7 @@ SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMEN SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } SELECT t2.1 FROM t_tuple_element; EXPLAIN SYNTAX SELECT t2.1 FROM t_tuple_element; @@ -31,7 +31,7 @@ SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMEN SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } DROP TABLE t_tuple_element; From 891277e01e99f21f4087c947a4fb3d448473562e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 25 Mar 2024 14:57:43 +0000 Subject: [PATCH 028/439] fix clang-tidy --- src/Storages/IStorageCluster.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h index 36ded8d5412..d219eb32f45 100644 --- a/src/Storages/IStorageCluster.h +++ b/src/Storages/IStorageCluster.h @@ -37,7 +37,7 @@ public: QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; - bool isRemote() const override final { return true; } + bool isRemote() const final { return true; } bool supportsSubcolumns() const override { return true; } bool supportsOptimizationToSubcolumns() const override { return false; } From aa36b039c1fcec8881faee73083beb007d7b83a3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 26 Mar 2024 15:02:25 +0000 Subject: [PATCH 029/439] fix test --- .../02971_functions_to_subcolumns_column_names.reference | 8 ++------ .../02971_functions_to_subcolumns_column_names.sql | 4 ++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference index 4787c660c68..03c16267db1 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference @@ -2,13 +2,9 @@ SELECT `arr.size0` AS `length(arr)`, `n.null` AS `isNull(n)` FROM t_column_names -┌─length(arr)─┬─isNull(n)─┐ -│ 3 │ 0 │ -└─────────────┴───────────┘ +{"length(arr)":"3","isNull(n)":0} SELECT __table1.`arr.size0` AS `length(arr)`, __table1.`n.null` AS `isNull(n)` FROM default.t_column_names AS __table1 -┌─length(arr)─┬─isNull(n)─┐ -│ 3 │ 0 │ -└─────────────┴───────────┘ +{"length(arr)":"3","isNull(n)":0} diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql index 89c39046df3..b867148c8ca 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql @@ -8,12 +8,12 @@ SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 0; EXPLAIN SYNTAX SELECT length(arr), isNull(n) FROM t_column_names; -SELECT length(arr), isNull(n) FROM t_column_names FORMAT PrettyCompactNoEscapes; +SELECT length(arr), isNull(n) FROM t_column_names FORMAT JSONEachRow; SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr), isNull(n) FROM t_column_names; -SELECT length(arr), isNull(n) FROM t_column_names FORMAT PrettyCompactNoEscapes; +SELECT length(arr), isNull(n) FROM t_column_names FORMAT JSONEachRow; DROP TABLE t_column_names; From 1f85889f889bfa35c8c7bc27fade1762b20906dd Mon Sep 17 00:00:00 2001 From: pufit Date: Fri, 29 Mar 2024 18:53:03 -0400 Subject: [PATCH 030/439] FuzzQuery table function --- src/Client/ClientBase.h | 2 +- src/{Client => Common}/QueryFuzzer.cpp | 50 +++-- src/{Client => Common}/QueryFuzzer.h | 1 + src/Storages/StorageFuzzQuery.cpp | 174 ++++++++++++++++++ src/Storages/StorageFuzzQuery.h | 89 +++++++++ src/Storages/registerStorages.cpp | 2 + src/TableFunctions/TableFunctionFuzzQuery.cpp | 54 ++++++ src/TableFunctions/TableFunctionFuzzQuery.h | 42 +++++ src/TableFunctions/registerTableFunctions.cpp | 1 + src/TableFunctions/registerTableFunctions.h | 1 + .../03031_table_function_fuzzquery.reference | 11 ++ .../03031_table_function_fuzzquery.sql | 18 ++ 12 files changed, 426 insertions(+), 19 deletions(-) rename src/{Client => Common}/QueryFuzzer.cpp (97%) rename src/{Client => Common}/QueryFuzzer.h (99%) create mode 100644 src/Storages/StorageFuzzQuery.cpp create mode 100644 src/Storages/StorageFuzzQuery.h create mode 100644 src/TableFunctions/TableFunctionFuzzQuery.cpp create mode 100644 src/TableFunctions/TableFunctionFuzzQuery.h create mode 100644 tests/queries/0_stateless/03031_table_function_fuzzquery.reference create mode 100644 tests/queries/0_stateless/03031_table_function_fuzzquery.sql diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 9ec87ababfc..c0188253904 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -6,13 +6,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include #include #include diff --git a/src/Client/QueryFuzzer.cpp b/src/Common/QueryFuzzer.cpp similarity index 97% rename from src/Client/QueryFuzzer.cpp rename to src/Common/QueryFuzzer.cpp index 7be01686258..137d545f82f 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Common/QueryFuzzer.cpp @@ -68,22 +68,21 @@ Field QueryFuzzer::getRandomField(int type) { case 0: { - return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) - / sizeof(*bad_int64_values))]; + return bad_int64_values[fuzz_rand() % std::size(bad_int64_values)]; } case 1: { static constexpr double values[] = {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999, 1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20, - FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))]; + FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % std::size(values)]; } case 2: { static constexpr UInt64 scales[] = {0, 1, 2, 10}; return DecimalField( - bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))], - static_cast(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))]) + bad_int64_values[fuzz_rand() % std::size(bad_int64_values)], + static_cast(scales[fuzz_rand() % std::size(scales)]) ); } default: @@ -165,7 +164,8 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - std::cerr << "erased\n"; + if (debug_output) + std::cerr << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -174,12 +174,14 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - std::cerr << fmt::format("inserted (pos {})\n", pos); + if (debug_output) + std::cerr << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - std::cerr << "inserted (0)\n"; + if (debug_output) + std::cerr << "inserted (0)\n"; } } @@ -197,7 +199,9 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - std::cerr << "erased\n"; + + if (debug_output) + std::cerr << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -206,12 +210,16 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - std::cerr << fmt::format("inserted (pos {})\n", pos); + + if (debug_output) + std::cerr << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - std::cerr << "inserted (0)\n"; + + if (debug_output) + std::cerr << "inserted (0)\n"; } } @@ -344,7 +352,8 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast) } else { - std::cerr << "No random column.\n"; + if (debug_output) + std::cerr << "No random column.\n"; } } @@ -378,7 +387,8 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) if (col) impl->children.insert(pos, col); else - std::cerr << "No random column.\n"; + if (debug_output) + std::cerr << "No random column.\n"; } // We don't have to recurse here to fuzz the children, this is handled by @@ -1360,11 +1370,15 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast) collectFuzzInfoMain(ast); fuzz(ast); - std::cout << std::endl; - WriteBufferFromOStream ast_buf(std::cout, 4096); - formatAST(*ast, ast_buf, false /*highlight*/); - ast_buf.finalize(); - std::cout << std::endl << std::endl; + if (debug_output) + { + std::cout << std::endl; + + WriteBufferFromOStream ast_buf(std::cout, 4096); + formatAST(*ast, ast_buf, false /*highlight*/); + ast_buf.finalize(); + std::cout << std::endl << std::endl; + } } } diff --git a/src/Client/QueryFuzzer.h b/src/Common/QueryFuzzer.h similarity index 99% rename from src/Client/QueryFuzzer.h rename to src/Common/QueryFuzzer.h index 6165e589cae..8a83934b620 100644 --- a/src/Client/QueryFuzzer.h +++ b/src/Common/QueryFuzzer.h @@ -38,6 +38,7 @@ struct ASTWindowDefinition; struct QueryFuzzer { pcg64 fuzz_rand{randomSeed()}; + bool debug_output = true; // We add elements to expression lists with fixed probability. Some elements // are so large, that the expected number of elements we add to them is diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp new file mode 100644 index 00000000000..c29986c7a7a --- /dev/null +++ b/src/Storages/StorageFuzzQuery.cpp @@ -0,0 +1,174 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int LOGICAL_ERROR; + extern const int INCORRECT_DATA; +} + +ColumnPtr FuzzQuerySource::createColumn() +{ + auto column = ColumnString::create(); + ColumnString::Chars & data_to = column->getChars(); + ColumnString::Offsets & offsets_to = column->getOffsets(); + + offsets_to.resize(block_size); + IColumn::Offset offset = 0; + + for (size_t row_num = 0; row_num < block_size; ++row_num) + { + ASTPtr new_query = query->clone(); + fuzzer.fuzzMain(new_query); + + WriteBufferFromOwnString out; + formatAST(*new_query, out, false); + auto data = out.str(); + size_t data_len = data.size(); + + IColumn::Offset next_offset = offset + data_len + 1; + data_to.resize(next_offset); + + std::copy(data.begin(), data.end(), &data_to[offset]); + + data_to[offset + data_len] = 0; + offsets_to[row_num] = next_offset; + + offset = next_offset; + } + + return column; +} + +StorageFuzzQuery::StorageFuzzQuery( + const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_) + : IStorage(table_id_), config(config_) +{ + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment_); + setInMemoryMetadata(storage_metadata); +} + +Pipe StorageFuzzQuery::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & /*query_info*/, + ContextPtr /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + size_t num_streams) +{ + storage_snapshot->check(column_names); + + Pipes pipes; + pipes.reserve(num_streams); + + const ColumnsDescription & our_columns = storage_snapshot->metadata->getColumns(); + Block block_header; + for (const auto & name : column_names) + { + const auto & name_type = our_columns.get(name); + MutableColumnPtr column = name_type.type->createColumn(); + block_header.insert({std::move(column), name_type.type, name_type.name}); + } + + const char * begin = config.query.data(); + const char * end = begin + config.query.size(); + + ParserQuery parser(end, 0); + auto query = parseQuery(parser, begin, end, "", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); + + for (UInt64 i = 0; i < num_streams; ++i) + pipes.emplace_back(std::make_shared(max_block_size, block_header, config, query)); + + return Pipe::unitePipes(std::move(pipes)); +} + +static constexpr std::array optional_configuration_keys = {"query_str", "random_seed"}; + +void StorageFuzzQuery::processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection) +{ + validateNamedCollection( + collection, + std::unordered_set(), + std::unordered_set(optional_configuration_keys.begin(), optional_configuration_keys.end())); + + if (collection.has("query")) + configuration.query = collection.get("query"); + + if (collection.has("random_seed")) + configuration.random_seed = collection.get("random_seed"); +} + +StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine_args, ContextPtr local_context) +{ + StorageFuzzQuery::Configuration configuration{}; + + if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) + { + StorageFuzzQuery::processNamedCollectionResult(configuration, *named_collection); + } + else + { + // Supported signatures: + // + // FuzzQuery('query') + // FuzzQuery('query', 'random_seed') + if (engine_args.empty() || engine_args.size() > 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 2 arguments: query, random_seed"); + + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); + + auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); + configuration.query = std::move(first_arg); + + if (engine_args.size() == 2) + { + const auto & literal = engine_args[1]->as(); + if (!literal.value.isNull()) + configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); + } + } + return configuration; +} + +void registerStorageFuzzQuery(StorageFactory & factory) +{ + factory.registerStorage( + "FuzzQuery", + [](const StorageFactory::Arguments & args) -> std::shared_ptr + { + ASTs & engine_args = args.engine_args; + + if (engine_args.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage FuzzQuery must have arguments."); + + StorageFuzzQuery::Configuration configuration = StorageFuzzQuery::getConfiguration(engine_args, args.getLocalContext()); + + for (const auto& col : args.columns) + if (col.type->getTypeId() != TypeIndex::String) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "'StorageFuzzQuery' supports only columns of String type, got {}.", col.type->getName()); + + return std::make_shared(args.table_id, args.columns, args.comment, configuration); + }); +} + +} diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h new file mode 100644 index 00000000000..47142a81f16 --- /dev/null +++ b/src/Storages/StorageFuzzQuery.h @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include +#include + +#include "config.h" + +namespace DB +{ + +class NamedCollection; + +class StorageFuzzQuery final : public IStorage +{ +public: + struct Configuration : public StatelessTableEngineConfiguration + { + String query = ""; + UInt64 random_seed = randomSeed(); + }; + + StorageFuzzQuery( + const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_); + + std::string getName() const override { return "FuzzQuery"; } + + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + static void processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection); + + static StorageFuzzQuery::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); + +private: + const Configuration config; +}; + + +class FuzzQuerySource : public ISource +{ +public: + FuzzQuerySource( + UInt64 block_size_, Block block_header_, const StorageFuzzQuery::Configuration & config_, ASTPtr query_) + : ISource(block_header_) + , block_size(block_size_) + , block_header(std::move(block_header_)) + , config(config_) + , query(query_) + { + fuzzer.fuzz_rand = config_.random_seed; + } + + String getName() const override { return "FuzzQuery"; } + +protected: + Chunk generate() override + { + Columns columns; + columns.reserve(block_header.columns()); + for (const auto & col : block_header) + { + chassert(col.type->getTypeId() == TypeIndex::String); + columns.emplace_back(createColumn()); + } + + return {std::move(columns), block_size}; + } + +private: + ColumnPtr createColumn(); + + UInt64 block_size; + Block block_header; + + StorageFuzzQuery::Configuration config; + ASTPtr query; + + QueryFuzzer fuzzer; +}; + +} diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index dea9feaf28b..f66d3ec3bfc 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -25,6 +25,7 @@ void registerStorageLiveView(StorageFactory & factory); void registerStorageGenerateRandom(StorageFactory & factory); void registerStorageExecutable(StorageFactory & factory); void registerStorageWindowView(StorageFactory & factory); +void registerStorageFuzzQuery(StorageFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerStorageFuzzJSON(StorageFactory & factory); #endif @@ -126,6 +127,7 @@ void registerStorages() registerStorageGenerateRandom(factory); registerStorageExecutable(factory); registerStorageWindowView(factory); + registerStorageFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerStorageFuzzJSON(factory); #endif diff --git a/src/TableFunctions/TableFunctionFuzzQuery.cpp b/src/TableFunctions/TableFunctionFuzzQuery.cpp new file mode 100644 index 00000000000..224f6666556 --- /dev/null +++ b/src/TableFunctions/TableFunctionFuzzQuery.cpp @@ -0,0 +1,54 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +void TableFunctionFuzzQuery::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + ASTs & args_func = ast_function->children; + + if (args_func.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments", getName()); + + auto args = args_func.at(0)->children; + configuration = StorageFuzzQuery::getConfiguration(args, context); +} + +StoragePtr TableFunctionFuzzQuery::executeImpl( + const ASTPtr & /*ast_function*/, + ContextPtr context, + const std::string & table_name, + ColumnsDescription /*cached_columns*/, + bool is_insert_query) const +{ + ColumnsDescription columns = getActualTableStructure(context, is_insert_query); + auto res = std::make_shared( + StorageID(getDatabaseName(), table_name), + columns, + /* comment */ String{}, + configuration); + res->startup(); + return res; +} + +void registerTableFunctionFuzzQuery(TableFunctionFactory & factory) +{ + factory.registerFunction( + {.documentation + = {.description = "Perturbs a query string with random variations.", + .returned_value = "A table object with a single column containing perturbed query strings."}, + .allow_readonly = true}); +} + +} diff --git a/src/TableFunctions/TableFunctionFuzzQuery.h b/src/TableFunctions/TableFunctionFuzzQuery.h new file mode 100644 index 00000000000..22d10341c4d --- /dev/null +++ b/src/TableFunctions/TableFunctionFuzzQuery.h @@ -0,0 +1,42 @@ +#pragma once + +#include + +#include +#include +#include + +#include "config.h" + +namespace DB +{ + +class TableFunctionFuzzQuery : public ITableFunction +{ +public: + static constexpr auto name = "fuzzQuery"; + std::string getName() const override { return name; } + + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + + ColumnsDescription getActualTableStructure(ContextPtr /* context */, bool /* is_insert_query */) const override + { + return ColumnsDescription{{"query", std::make_shared()}}; + } + +private: + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return "fuzzQuery"; } + + String source; + std::optional random_seed; + StorageFuzzQuery::Configuration configuration; +}; + +} diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index 927457ff9f6..2952d0b7b70 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -25,6 +25,7 @@ void registerTableFunctions() registerTableFunctionMongoDB(factory); registerTableFunctionRedis(factory); registerTableFunctionMergeTreeIndex(factory); + registerTableFunctionFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerTableFunctionFuzzJSON(factory); #endif diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 296af146faf..eef262490bf 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -22,6 +22,7 @@ void registerTableFunctionGenerate(TableFunctionFactory & factory); void registerTableFunctionMongoDB(TableFunctionFactory & factory); void registerTableFunctionRedis(TableFunctionFactory & factory); void registerTableFunctionMergeTreeIndex(TableFunctionFactory & factory); +void registerTableFunctionFuzzQuery(TableFunctionFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerTableFunctionFuzzJSON(TableFunctionFactory & factory); #endif diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference new file mode 100644 index 00000000000..d598037127f --- /dev/null +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference @@ -0,0 +1,11 @@ +SELECT 1 +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC +SELECT\n item_id,\n *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], toNullable(\'Array(String)\')) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(materialize(toLowCardinality(3)))\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString(count(), (number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC NULLS FIRST,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n [toString((number % 2) * 2)],\n CAST([toString(number % 2)], toNullable(\'Array(LowCardinality(String))\')) AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE (number % 2) * toUInt128(2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC +SELECT\n toString((number % 2) * 2),\n *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql new file mode 100644 index 00000000000..5f5bb4b23e4 --- /dev/null +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql @@ -0,0 +1,18 @@ + +SELECT * FROM fuzzQuery('SELECT 1', 8956) LIMIT 1; + +SELECT * FROM fuzzQuery('SELECT * +FROM ( + SELECT + ([toString(number % 2)] :: Array(LowCardinality(String))) AS item_id, + count() + FROM numbers(3) + GROUP BY item_id WITH TOTALS +) AS l FULL JOIN ( + SELECT + ([toString((number % 2) * 2)] :: Array(String)) AS item_id + FROM numbers(3) +) AS r +ON l.item_id = r.item_id +ORDER BY 1,2,3; +', 8956) LIMIT 10; From 77e28e29e9b2550204ea8d60647115587f36673e Mon Sep 17 00:00:00 2001 From: pufit Date: Sat, 30 Mar 2024 14:40:18 -0400 Subject: [PATCH 031/439] fix style --- src/Storages/StorageFuzzQuery.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp index c29986c7a7a..56b6a4de2a6 100644 --- a/src/Storages/StorageFuzzQuery.cpp +++ b/src/Storages/StorageFuzzQuery.cpp @@ -19,8 +19,6 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int LOGICAL_ERROR; - extern const int INCORRECT_DATA; } ColumnPtr FuzzQuerySource::createColumn() From 6e611f7e81fe9977dd88f81cc4c24f3e1620ae8d Mon Sep 17 00:00:00 2001 From: pufit Date: Mon, 8 Apr 2024 16:18:29 +0200 Subject: [PATCH 032/439] fix review --- .../table-functions/fuzzQuery.md | 35 +++++++++ src/Common/QueryFuzzer.cpp | 40 +++++----- src/Common/QueryFuzzer.h | 37 ++++++--- src/Storages/StorageFuzzQuery.cpp | 75 +++++++++---------- src/Storages/StorageFuzzQuery.h | 4 +- .../03031_table_function_fuzzquery.reference | 18 ++--- 6 files changed, 124 insertions(+), 85 deletions(-) create mode 100644 docs/en/sql-reference/table-functions/fuzzQuery.md diff --git a/docs/en/sql-reference/table-functions/fuzzQuery.md b/docs/en/sql-reference/table-functions/fuzzQuery.md new file mode 100644 index 00000000000..ff8cfd1cd3b --- /dev/null +++ b/docs/en/sql-reference/table-functions/fuzzQuery.md @@ -0,0 +1,35 @@ +--- +slug: /en/sql-reference/table-functions/fuzzQuery +sidebar_position: 75 +sidebar_label: fuzzQuery +--- + +# fuzzQuery + +Perturbs the given query string with random variations. + +``` sql +fuzzQuery(query[, random_seed]) +``` + +**Arguments** + +- `query` (String) - The source query to perform the fuzzing on. +- `random_seed` (UInt64) - A random seed for producing stable results. + +**Returned Value** + +A table object with a single column containing perturbed query strings. + +## Usage Example + +``` sql +SELECT * FROM fuzzQuery('SELECT materialize(\'a\' AS key) GROUP BY key') LIMIT 2; +``` + +``` + ┌─query──────────────────────────────────────────────────────────┐ +1. │ SELECT 'a' AS key GROUP BY key │ +2. │ EXPLAIN PIPELINE compact = true SELECT 'a' AS key GROUP BY key │ + └────────────────────────────────────────────────────────────────┘ +``` diff --git a/src/Common/QueryFuzzer.cpp b/src/Common/QueryFuzzer.cpp index 137d545f82f..6dd51033e3c 100644 --- a/src/Common/QueryFuzzer.cpp +++ b/src/Common/QueryFuzzer.cpp @@ -164,8 +164,8 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - if (debug_output) - std::cerr << "erased\n"; + if (debug_stream) + *debug_stream << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -174,14 +174,14 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - if (debug_output) - std::cerr << fmt::format("inserted (pos {})\n", pos); + if (debug_stream) + *debug_stream << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - if (debug_output) - std::cerr << "inserted (0)\n"; + if (debug_stream) + *debug_stream << "inserted (0)\n"; } } @@ -200,8 +200,8 @@ Field QueryFuzzer::fuzzField(Field field) size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - if (debug_output) - std::cerr << "erased\n"; + if (debug_stream) + *debug_stream << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -211,15 +211,15 @@ Field QueryFuzzer::fuzzField(Field field) size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - if (debug_output) - std::cerr << fmt::format("inserted (pos {})\n", pos); + if (debug_stream) + *debug_stream << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - if (debug_output) - std::cerr << "inserted (0)\n"; + if (debug_stream) + *debug_stream << "inserted (0)\n"; } } @@ -352,8 +352,8 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast) } else { - if (debug_output) - std::cerr << "No random column.\n"; + if (debug_stream) + *debug_stream << "No random column.\n"; } } @@ -387,8 +387,8 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) if (col) impl->children.insert(pos, col); else - if (debug_output) - std::cerr << "No random column.\n"; + if (debug_stream) + *debug_stream << "No random column.\n"; } // We don't have to recurse here to fuzz the children, this is handled by @@ -1370,14 +1370,14 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast) collectFuzzInfoMain(ast); fuzz(ast); - if (debug_output) + if (out_stream) { - std::cout << std::endl; + *out_stream << std::endl; - WriteBufferFromOStream ast_buf(std::cout, 4096); + WriteBufferFromOStream ast_buf(*out_stream, 4096); formatAST(*ast, ast_buf, false /*highlight*/); ast_buf.finalize(); - std::cout << std::endl << std::endl; + *out_stream << std::endl << std::endl; } } diff --git a/src/Common/QueryFuzzer.h b/src/Common/QueryFuzzer.h index 8a83934b620..3cf0381e044 100644 --- a/src/Common/QueryFuzzer.h +++ b/src/Common/QueryFuzzer.h @@ -35,10 +35,32 @@ struct ASTWindowDefinition; * queries, so you want to feed it a lot of queries to get some interesting mix * of them. Normally we feed SQL regression tests to it. */ -struct QueryFuzzer +class QueryFuzzer { - pcg64 fuzz_rand{randomSeed()}; - bool debug_output = true; +public: + + QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = &std::cout, std::ostream * debug_stream_ = &std::cerr) + : fuzz_rand(fuzz_rand_) + , out_stream(out_stream_) + , debug_stream(debug_stream_) + { + } + + // This is the only function you have to call -- it will modify the passed + // ASTPtr to point to new AST with some random changes. + void fuzzMain(ASTPtr & ast); + + ASTs getInsertQueriesForFuzzedTables(const String & full_query); + ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); + void notifyQueryFailed(ASTPtr ast); + + static bool isSuitableForFuzzing(const ASTCreateQuery & create); + +private: + pcg64 fuzz_rand; + + std::ostream * out_stream = nullptr; + std::ostream * debug_stream = nullptr; // We add elements to expression lists with fixed probability. Some elements // are so large, that the expected number of elements we add to them is @@ -67,10 +89,6 @@ struct QueryFuzzer std::unordered_map index_of_fuzzed_table; std::set created_tables_hashes; - // This is the only function you have to call -- it will modify the passed - // ASTPtr to point to new AST with some random changes. - void fuzzMain(ASTPtr & ast); - // Various helper functions follow, normally you shouldn't have to call them. Field getRandomField(int type); Field fuzzField(Field field); @@ -78,9 +96,6 @@ struct QueryFuzzer ASTPtr getRandomExpressionList(); DataTypePtr fuzzDataType(DataTypePtr type); DataTypePtr getRandomType(); - ASTs getInsertQueriesForFuzzedTables(const String & full_query); - ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); - void notifyQueryFailed(ASTPtr ast); void replaceWithColumnLike(ASTPtr & ast); void replaceWithTableLike(ASTPtr & ast); void fuzzOrderByElement(ASTOrderByElement * elem); @@ -103,8 +118,6 @@ struct QueryFuzzer void addTableLike(ASTPtr ast); void addColumnLike(ASTPtr ast); void collectFuzzInfoRecurse(ASTPtr ast); - - static bool isSuitableForFuzzing(const ASTCreateQuery & create); }; } diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp index 56b6a4de2a6..e2b836c98b9 100644 --- a/src/Storages/StorageFuzzQuery.cpp +++ b/src/Storages/StorageFuzzQuery.cpp @@ -30,16 +30,29 @@ ColumnPtr FuzzQuerySource::createColumn() offsets_to.resize(block_size); IColumn::Offset offset = 0; - for (size_t row_num = 0; row_num < block_size; ++row_num) + auto fuzz_base = query; + size_t row_num = 0; + + while (row_num < block_size) { - ASTPtr new_query = query->clone(); + ASTPtr new_query = fuzz_base->clone(); + + auto base_before_fuzz = fuzz_base->formatForErrorMessage(); fuzzer.fuzzMain(new_query); + auto fuzzed_text = new_query->formatForErrorMessage(); WriteBufferFromOwnString out; formatAST(*new_query, out, false); auto data = out.str(); size_t data_len = data.size(); + /// AST is too long, will start from the original query. + if (data_len > 500) + { + fuzz_base = query; + continue; + } + IColumn::Offset next_offset = offset + data_len + 1; data_to.resize(next_offset); @@ -49,6 +62,8 @@ ColumnPtr FuzzQuerySource::createColumn() offsets_to[row_num] = next_offset; offset = next_offset; + fuzz_base = new_query; + ++row_num; } return column; @@ -99,52 +114,30 @@ Pipe StorageFuzzQuery::read( return Pipe::unitePipes(std::move(pipes)); } -static constexpr std::array optional_configuration_keys = {"query_str", "random_seed"}; - -void StorageFuzzQuery::processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection) -{ - validateNamedCollection( - collection, - std::unordered_set(), - std::unordered_set(optional_configuration_keys.begin(), optional_configuration_keys.end())); - - if (collection.has("query")) - configuration.query = collection.get("query"); - - if (collection.has("random_seed")) - configuration.random_seed = collection.get("random_seed"); -} - StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine_args, ContextPtr local_context) { StorageFuzzQuery::Configuration configuration{}; - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) + // Supported signatures: + // + // FuzzQuery('query') + // FuzzQuery('query', 'random_seed') + if (engine_args.empty() || engine_args.size() > 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 2 arguments: query, random_seed"); + + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); + + auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); + configuration.query = std::move(first_arg); + + if (engine_args.size() == 2) { - StorageFuzzQuery::processNamedCollectionResult(configuration, *named_collection); + const auto & literal = engine_args[1]->as(); + if (!literal.value.isNull()) + configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); } - else - { - // Supported signatures: - // - // FuzzQuery('query') - // FuzzQuery('query', 'random_seed') - if (engine_args.empty() || engine_args.size() > 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 2 arguments: query, random_seed"); - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); - configuration.query = std::move(first_arg); - - if (engine_args.size() == 2) - { - const auto & literal = engine_args[1]->as(); - if (!literal.value.isNull()) - configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); - } - } return configuration; } diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h index 47142a81f16..e948d0b2acf 100644 --- a/src/Storages/StorageFuzzQuery.h +++ b/src/Storages/StorageFuzzQuery.h @@ -35,8 +35,6 @@ public: size_t max_block_size, size_t num_streams) override; - static void processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection); - static StorageFuzzQuery::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); private: @@ -54,8 +52,8 @@ public: , block_header(std::move(block_header_)) , config(config_) , query(query_) + , fuzzer(config_.random_seed, /* out_stream= */ nullptr, /* debug_stream= */ nullptr) { - fuzzer.fuzz_rand = config_.random_seed; } String getName() const override { return "FuzzQuery"; } diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference index d598037127f..c5b92291207 100644 --- a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference @@ -1,11 +1,11 @@ SELECT 1 SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC -SELECT\n item_id,\n *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], toNullable(\'Array(String)\')) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(materialize(toLowCardinality(3)))\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString(count(), (number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC NULLS FIRST,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n [toString((number % 2) * 2)],\n CAST([toString(number % 2)], toNullable(\'Array(LowCardinality(String))\')) AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE (number % 2) * toUInt128(2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC -SELECT\n toString((number % 2) * 2),\n *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(materialize(toLowCardinality(3)))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC NULLS LAST +SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\'),\n CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC NULLS FIRST +SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\'),\n CAST([toString(multiply(number % 2, item_id, 2))]) AS item_id\n FROM numbers(3)\n WHERE \'Array(LowCardinality(String))\'\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 DESC NULLS FIRST +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC From 4e1f98ee7b78d48362a6788109658c8de859abd5 Mon Sep 17 00:00:00 2001 From: pufit Date: Tue, 9 Apr 2024 14:53:28 +0200 Subject: [PATCH 033/439] removed cout,cerr from src --- src/Client/ClientBase.h | 2 +- src/Common/QueryFuzzer.h | 3 +-- src/Storages/StorageFuzzQuery.h | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index c0188253904..2b05878c176 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -298,7 +298,7 @@ protected: bool send_external_tables = false; NameToNameMap query_parameters; /// Dictionary with query parameters for prepared statements. - QueryFuzzer fuzzer; + QueryFuzzer fuzzer{randomSeed(), &std::cout, &std::cerr}; int query_fuzzer_runs = 0; int create_query_fuzzer_runs = 0; diff --git a/src/Common/QueryFuzzer.h b/src/Common/QueryFuzzer.h index 3cf0381e044..bf87bdfb24e 100644 --- a/src/Common/QueryFuzzer.h +++ b/src/Common/QueryFuzzer.h @@ -38,8 +38,7 @@ struct ASTWindowDefinition; class QueryFuzzer { public: - - QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = &std::cout, std::ostream * debug_stream_ = &std::cerr) + QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr) : fuzz_rand(fuzz_rand_) , out_stream(out_stream_) , debug_stream(debug_stream_) diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h index e948d0b2acf..40833190895 100644 --- a/src/Storages/StorageFuzzQuery.h +++ b/src/Storages/StorageFuzzQuery.h @@ -52,7 +52,7 @@ public: , block_header(std::move(block_header_)) , config(config_) , query(query_) - , fuzzer(config_.random_seed, /* out_stream= */ nullptr, /* debug_stream= */ nullptr) + , fuzzer(config_.random_seed) { } From 3bbf86d34506b74a92ca53e857d4c35b9406a08b Mon Sep 17 00:00:00 2001 From: pufit Date: Tue, 9 Apr 2024 14:54:58 +0200 Subject: [PATCH 034/439] update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index ee3ef1ae795..9d9bea11b82 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1581,6 +1581,7 @@ fuzzBits fuzzJSON fuzzer fuzzers +fuzzQuery gRPC gccMurmurHash gcem From 6329dc812462363ff6edf0388239f6bead2efacd Mon Sep 17 00:00:00 2001 From: pufit Date: Tue, 9 Apr 2024 15:05:49 +0200 Subject: [PATCH 035/439] fix --- programs/client/Client.h | 5 ++++- src/Client/ClientBase.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/programs/client/Client.h b/programs/client/Client.h index 11d9dec97b1..122b8e5ab3f 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -9,7 +9,10 @@ namespace DB class Client : public ClientBase { public: - Client() = default; + Client() + { + fuzzer = QueryFuzzer(randomSeed(), &std::cout, &std::cerr); + } void initialize(Poco::Util::Application & self) override; diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 2b05878c176..c0188253904 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -298,7 +298,7 @@ protected: bool send_external_tables = false; NameToNameMap query_parameters; /// Dictionary with query parameters for prepared statements. - QueryFuzzer fuzzer{randomSeed(), &std::cout, &std::cerr}; + QueryFuzzer fuzzer; int query_fuzzer_runs = 0; int create_query_fuzzer_runs = 0; From 76415ba3523921de138feb39fca7fbc65b8f6dc5 Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 10 Apr 2024 15:27:53 +0200 Subject: [PATCH 036/439] add `explicit`, more stable tests --- src/Common/QueryFuzzer.h | 2 +- .../03031_table_function_fuzzquery.reference | 13 ++----------- .../0_stateless/03031_table_function_fuzzquery.sql | 4 ++-- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/src/Common/QueryFuzzer.h b/src/Common/QueryFuzzer.h index bf87bdfb24e..35d088809f2 100644 --- a/src/Common/QueryFuzzer.h +++ b/src/Common/QueryFuzzer.h @@ -38,7 +38,7 @@ struct ASTWindowDefinition; class QueryFuzzer { public: - QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr) + explicit QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr) : fuzz_rand(fuzz_rand_) , out_stream(out_stream_) , debug_stream(debug_stream_) diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference index c5b92291207..202e4557a33 100644 --- a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference @@ -1,11 +1,2 @@ -SELECT 1 -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(materialize(toLowCardinality(3)))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC NULLS LAST -SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\'),\n CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC NULLS FIRST -SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\'),\n CAST([toString(multiply(number % 2, item_id, 2))]) AS item_id\n FROM numbers(3)\n WHERE \'Array(LowCardinality(String))\'\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 DESC NULLS FIRST -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC +query +String diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql index 5f5bb4b23e4..5821e2e5111 100644 --- a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql @@ -1,5 +1,5 @@ -SELECT * FROM fuzzQuery('SELECT 1', 8956) LIMIT 1; +SELECT * FROM fuzzQuery('SELECT 1', 8956) LIMIT 0 FORMAT TSVWithNamesAndTypes; SELECT * FROM fuzzQuery('SELECT * FROM ( @@ -15,4 +15,4 @@ FROM ( ) AS r ON l.item_id = r.item_id ORDER BY 1,2,3; -', 8956) LIMIT 10; +', 8956) LIMIT 10 FORMAT NULL; From 4df3cf3b151f407c4daaa7f8df80a6fddb836280 Mon Sep 17 00:00:00 2001 From: pufit Date: Thu, 11 Apr 2024 16:57:45 +0200 Subject: [PATCH 037/439] Update StorageFuzzQuery.h --- src/Storages/StorageFuzzQuery.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h index 40833190895..3ae506fdfb8 100644 --- a/src/Storages/StorageFuzzQuery.h +++ b/src/Storages/StorageFuzzQuery.h @@ -17,7 +17,7 @@ class StorageFuzzQuery final : public IStorage public: struct Configuration : public StatelessTableEngineConfiguration { - String query = ""; + String query; UInt64 random_seed = randomSeed(); }; From 562d76ccab6ffc53c9059139fead3b2e41ff4725 Mon Sep 17 00:00:00 2001 From: pufit Date: Fri, 12 Apr 2024 11:19:44 +0200 Subject: [PATCH 038/439] Update StorageFuzzQuery.cpp --- src/Storages/StorageFuzzQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp index e2b836c98b9..5e29a04427b 100644 --- a/src/Storages/StorageFuzzQuery.cpp +++ b/src/Storages/StorageFuzzQuery.cpp @@ -105,7 +105,7 @@ Pipe StorageFuzzQuery::read( const char * begin = config.query.data(); const char * end = begin + config.query.size(); - ParserQuery parser(end, 0); + ParserQuery parser(end, false); auto query = parseQuery(parser, begin, end, "", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); for (UInt64 i = 0; i < num_streams; ++i) From 517191218fbc8c780fc218ac059b9095d1670752 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 18 Apr 2024 22:33:09 +0000 Subject: [PATCH 039/439] Add compact mode in Variant discriminators serialization --- src/DataTypes/DataTypeVariant.h | 1 + src/DataTypes/Serializations/ISerialization.h | 2 + .../Serializations/SerializationVariant.cpp | 323 +++++++++++++++--- .../Serializations/SerializationVariant.h | 65 ++++ .../SerializationVariantElement.cpp | 135 ++++++-- .../SerializationVariantElement.h | 9 + .../MergeTreeDataPartWriterCompact.cpp | 6 +- .../MergeTree/MergeTreeDataPartWriterWide.cpp | 5 +- .../MergeTree/MergeTreeReaderWide.cpp | 23 +- src/Storages/MergeTree/MergeTreeReaderWide.h | 5 +- src/Storages/MergeTree/MergeTreeSettings.h | 1 + 11 files changed, 483 insertions(+), 92 deletions(-) diff --git a/src/DataTypes/DataTypeVariant.h b/src/DataTypes/DataTypeVariant.h index dadc85ac3b3..ab471d37b2f 100644 --- a/src/DataTypes/DataTypeVariant.h +++ b/src/DataTypes/DataTypeVariant.h @@ -42,6 +42,7 @@ public: bool equals(const IDataType & rhs) const override; bool isParametric() const override { return true; } + bool isComparable() const override { return true; } bool haveSubtypes() const override { return true; } bool textCanContainOnlyValidUTF8() const override; bool haveMaximumSizeOfValue() const override; diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index ebaa26d19a6..934a92ecfba 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -240,6 +240,8 @@ public: bool low_cardinality_use_single_dictionary_for_part = true; bool position_independent_encoding = true; + + bool use_compact_variant_discriminators_serialization = false; }; struct DeserializeBinaryBulkSettings diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index 8ca86c63bf6..0490ed62c3b 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -70,14 +70,25 @@ void SerializationVariant::enumerateStreams( settings.path.pop_back(); } -struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState +struct SerializationVariant::SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState { + SerializeBinaryBulkStateVariant(UInt64 mode) : discriminators_mode(mode) + { + } + + SerializationVariant::DiscriminatorsSerializationMode discriminators_mode; std::vector states; }; -struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState +struct SerializationVariant::DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState { + DeserializeBinaryBulkStateVariant(UInt64 mode) : discriminators_mode(mode) + { + } + + SerializationVariant::DiscriminatorsSerializationMode discriminators_mode; std::vector states; + SerializationVariant::DiscriminatorsDeserializationState discriminators_state; }; void SerializationVariant::serializeBinaryBulkStatePrefix( @@ -85,9 +96,18 @@ void SerializationVariant::serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const { - const ColumnVariant & col = assert_cast(column); + settings.path.push_back(Substream::VariantDiscriminators); + auto * discriminators_stream = settings.getter(settings.path); + settings.path.pop_back(); - auto variant_state = std::make_shared(); + if (!discriminators_stream) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream for VariantDiscriminators in SerializationVariant::serializeBinaryBulkStatePrefix"); + + UInt64 mode = settings.use_compact_variant_discriminators_serialization ? DiscriminatorsSerializationMode::COMPACT : DiscriminatorsSerializationMode::BASIC; + writeBinaryLittleEndian(mode, *discriminators_stream); + + const ColumnVariant & col = assert_cast(column); + auto variant_state = std::make_shared(mode); variant_state->states.resize(variants.size()); settings.path.push_back(Substream::VariantElements); @@ -125,7 +145,16 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const { - auto variant_state = std::make_shared(); + settings.path.push_back(Substream::VariantDiscriminators); + auto * discriminators_stream = settings.getter(settings.path); + settings.path.pop_back(); + + if (!discriminators_stream) + return; + + UInt64 mode; + readBinaryLittleEndian(mode, *discriminators_stream); + auto variant_state = std::make_shared(mode); variant_state->states.resize(variants.size()); settings.path.push_back(Substream::VariantElements); @@ -161,13 +190,66 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams( auto * variant_state = checkAndGetState(state); - /// If offset = 0 and limit == col.size() or we have only NULLs, we don't need to calculate + /// Write number of rows in this granule in compact mode. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + writeVarUInt(UInt64(limit), *discriminators_stream); + + /// If column has only one none empty discriminators and no NULLs we don't need to + /// calculate limits for variants and use provided offset/limit. + if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) + { + auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr); + + /// In compact mode write the format of the granule and single non-empty discriminator. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream); + } + /// For basic mode just serialize this discriminator limit times. + else + { + for (size_t i = 0; i < limit; ++i) + writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream); + } + + settings.path.push_back(Substream::VariantElements); + addVariantElementToPath(settings.path, non_empty_global_discr); + /// We can use the same offset/limit as for whole Variant column + variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]); + settings.path.pop_back(); + settings.path.pop_back(); + return; + } + /// If column has only NULLs, just serialize NULL discriminators. + else if (col.hasOnlyNulls()) + { + /// In compact mode write single NULL_DISCRIMINATOR. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream); + } + /// In basic mode write NULL_DISCRIMINATOR limit times. + else + { + for (size_t i = 0; i < limit; ++i) + writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream); + } + return; + } + + /// If offset = 0 and limit == col.size() we don't need to calculate /// offsets and limits for variants and need to just serialize whole columns. - if ((offset == 0 && limit == col.size()) || col.hasOnlyNulls()) + if ((offset == 0 && limit == col.size())) { /// First, serialize discriminators. - /// If we have only NULLs or local and global discriminators are the same, just serialize the column as is. - if (col.hasOnlyNulls() || col.hasGlobalVariantsOrder()) + /// Here we are sure that column contains different discriminators, use plain granule format in compact mode. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream); + + /// If local and global discriminators are the same, just serialize the column as is. + if (col.hasGlobalVariantsOrder()) { SerializationNumber().serializeBinaryBulk(col.getLocalDiscriminatorsColumn(), *discriminators_stream, offset, limit); } @@ -191,35 +273,16 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams( return; } - /// If we have only one non empty variant and no NULLs, we can use the same limit offset for this variant. - if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) - { - /// First, serialize discriminators. - /// We know that all discriminators are the same, so we just need to serialize this discriminator limit times. - auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr); - for (size_t i = 0; i != limit; ++i) - writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream); - - /// Second, serialize non-empty variant (other variants are empty and we can skip their serialization). - settings.path.push_back(Substream::VariantElements); - addVariantElementToPath(settings.path, non_empty_global_discr); - /// We can use the same offset/limit as for whole Variant column - variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]); - settings.path.pop_back(); - settings.path.pop_back(); - return; - } - /// In general case we should iterate through local discriminators in range [offset, offset + limit] to serialize global discriminators and calculate offset/limit pair for each variant. const auto & local_discriminators = col.getLocalDiscriminators(); const auto & offsets = col.getOffsets(); std::vector> variant_offsets_and_limits(variants.size(), {0, 0}); size_t end = offset + limit; + size_t num_non_empty_variants_in_range = 0; + ColumnVariant::Discriminator last_non_empty_variant_discr = 0; for (size_t i = offset; i < end; ++i) { auto global_discr = col.globalDiscriminatorByLocal(local_discriminators[i]); - writeBinaryLittleEndian(global_discr, *discriminators_stream); - if (global_discr != ColumnVariant::NULL_DISCRIMINATOR) { /// If we see this discriminator for the first time, update offset @@ -227,9 +290,38 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams( variant_offsets_and_limits[global_discr].first = offsets[i]; /// Update limit for this discriminator. ++variant_offsets_and_limits[global_discr].second; + ++num_non_empty_variants_in_range; + last_non_empty_variant_discr = global_discr; } } + /// In basic mode just serialize discriminators as is row by row. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::BASIC) + { + for (size_t i = offset; i < end; ++i) + writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream); + } + /// In compact mode check if we have the same discriminator for all rows in this granule. + /// First, check if all values in granule are NULLs. + else if (num_non_empty_variants_in_range == 0) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream); + } + /// Then, check if there is only 1 variant and no NULLs in this granule. + else if (num_non_empty_variants_in_range == 1 && variant_offsets_and_limits[last_non_empty_variant_discr].second == limit) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(last_non_empty_variant_discr, *discriminators_stream); + } + /// Otherwise there are different discriminators in this granule. + else + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream); + for (size_t i = offset; i < end; ++i) + writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream); + } + /// Serialize variants in global order. settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i != variants.size(); ++i) @@ -267,34 +359,62 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( /// First, deserialize discriminators. settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStateVariant * variant_state = nullptr; + std::vector variant_limits; if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) { + variant_state = checkAndGetState(state); col.getLocalDiscriminatorsPtr() = cached_discriminators; } - else + else if (auto * discriminators_stream = settings.getter(settings.path)) { - auto * discriminators_stream = settings.getter(settings.path); - if (!discriminators_stream) - return; + variant_state = checkAndGetState(state); + + /// Deserialize discriminators according to serialization mode. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::BASIC) + SerializationNumber().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0); + else + variant_limits = deserializeCompactDiscriminators(col.getLocalDiscriminatorsPtr(), limit, discriminators_stream, settings.continuous_reading, variant_state->discriminators_state); - SerializationNumber().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0); addToSubstreamsCache(cache, settings.path, col.getLocalDiscriminatorsPtr()); } + /// It may happen that there is no such stream, in this case just do nothing. + else + { + settings.path.pop_back(); + return; + } + settings.path.pop_back(); - /// Second, calculate limits for each variant by iterating through new discriminators. - std::vector variant_limits(variants.size(), 0); - auto & discriminators_data = col.getLocalDiscriminators(); - size_t discriminators_offset = discriminators_data.size() - limit; - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + /// Second, calculate limits for each variant by iterating through new discriminators + /// if we didn't do it during discriminators deserialization. + if (variant_limits.empty()) { - ColumnVariant::Discriminator discr = discriminators_data[i]; - if (discr != ColumnVariant::NULL_DISCRIMINATOR) - ++variant_limits[discr]; + variant_limits.resize(variants.size(), 0); + auto & discriminators_data = col.getLocalDiscriminators(); + + /// We can actually read less than limit discriminators and we cannot determine the actual number of read rows + /// by discriminators column as it could be taken from the substreams cache. And we need actual number of read + /// rows to fill offsets correctly later if they are not in the cache. We can determine if offsets column is in cache + /// or not by comparing it with discriminators column size (they should be the same when offsets are in cache). + /// If offsets are not in the cache, we can use it's size to determine the actual number of read rows. + size_t num_new_discriminators = limit; + size_t offsets_size = col.getOffsetsPtr()->size(); + if (discriminators_data.size() > offsets_size) + num_new_discriminators = discriminators_data.size() - offsets_size; + size_t discriminators_offset = discriminators_data.size() - num_new_discriminators; + + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + { + ColumnVariant::Discriminator discr = discriminators_data[i]; + if (discr != ColumnVariant::NULL_DISCRIMINATOR) + ++variant_limits[discr]; + } } /// Now we can deserialize variants according to their limits. - auto * variant_state = checkAndGetState(state); settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i != variants.size(); ++i) { @@ -319,20 +439,49 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( } else { - auto & offsets = col.getOffsets(); - offsets.reserve(offsets.size() + limit); std::vector variant_offsets; variant_offsets.reserve(variants.size()); + size_t num_non_empty_variants = 0; + ColumnVariant::Discriminator last_non_empty_discr = 0; for (size_t i = 0; i != variants.size(); ++i) - variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]); - - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) { - ColumnVariant::Discriminator discr = discriminators_data[i]; - if (discr == ColumnVariant::NULL_DISCRIMINATOR) - offsets.emplace_back(); - else - offsets.push_back(variant_offsets[discr]++); + if (variant_limits[i]) + { + ++num_non_empty_variants; + last_non_empty_discr = i; + } + + variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]); + } + + auto & discriminators_data = col.getLocalDiscriminators(); + auto & offsets = col.getOffsets(); + size_t num_new_offsets = discriminators_data.size() - offsets.size(); + offsets.reserve(offsets.size() + num_new_offsets); + /// If there are only NULLs were read, fill offsets with 0. + if (num_non_empty_variants == 0) + { + offsets.resize_fill(discriminators_data.size(), 0); + } + /// If there is only 1 variant and no NULLs was read, fill offsets with sequential offsets of this variant. + else if (num_non_empty_variants == 1 && variant_limits[last_non_empty_discr] == num_new_offsets) + { + size_t first_offset = col.getVariantByLocalDiscriminator(last_non_empty_discr).size() - num_new_offsets; + for (size_t i = 0; i != num_new_offsets; ++i) + offsets.push_back(first_offset + i); + } + /// Otherwise iterate through discriminators and fill offsets accordingly. + else + { + size_t start = offsets.size(); + for (size_t i = start; i != discriminators_data.size(); ++i) + { + ColumnVariant::Discriminator discr = discriminators_data[i]; + if (discr == ColumnVariant::NULL_DISCRIMINATOR) + offsets.emplace_back(); + else + offsets.push_back(variant_offsets[discr]++); + } } addToSubstreamsCache(cache, settings.path, col.getOffsetsPtr()); @@ -340,6 +489,72 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( settings.path.pop_back(); } +std::vector SerializationVariant::deserializeCompactDiscriminators( + DB::ColumnPtr & discriminators_column, + size_t limit, + ReadBuffer * stream, + bool continuous_reading, + DiscriminatorsDeserializationState & state) const +{ + auto & discriminators = assert_cast(*discriminators_column->assumeMutable()); + auto & discriminators_data = discriminators.getData(); + + /// Reset state if we are reading from the start of the granule and not from the previous position in the file. + if (!continuous_reading) + state.remaining_rows_in_granule = 0; + + /// Calculate limits for variants during discriminators deserialization. + std::vector variant_limits(variants.size(), 0); + while (limit) + { + /// If we read all rows from current granule, start reading the next one. + if (state.remaining_rows_in_granule == 0) + { + if (stream->eof()) + return variant_limits; + + readDiscriminatorsGranuleStart(state, stream); + } + + size_t limit_in_granule = std::min(limit, state.remaining_rows_in_granule); + if (state.granule_format == CompactDiscriminatorsGranuleFormat::COMPACT) + { + auto & data = discriminators.getData(); + data.resize_fill(data.size() + limit_in_granule, state.compact_discr); + if (state.compact_discr != ColumnVariant::NULL_DISCRIMINATOR) + variant_limits[state.compact_discr] += limit_in_granule; + } + else + { + SerializationNumber().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0); + size_t start = discriminators_data.size() - limit_in_granule; + for (size_t i = start; i != discriminators_data.size(); ++i) + { + ColumnVariant::Discriminator discr = discriminators_data[i]; + if (discr != ColumnVariant::NULL_DISCRIMINATOR) + ++variant_limits[discr]; + } + } + + state.remaining_rows_in_granule -= limit_in_granule; + limit -= limit_in_granule; + } + + return variant_limits; +} + +void SerializationVariant::readDiscriminatorsGranuleStart(DB::SerializationVariant::DiscriminatorsDeserializationState & state, DB::ReadBuffer * stream) +{ + UInt64 granule_size; + readVarUInt(granule_size, *stream); + state.remaining_rows_in_granule = granule_size; + UInt8 granule_format; + readBinaryLittleEndian(granule_format, *stream); + state.granule_format = static_cast(granule_format); + if (granule_format == CompactDiscriminatorsGranuleFormat::COMPACT) + readBinaryLittleEndian(state.compact_discr, *stream); +} + void SerializationVariant::addVariantElementToPath(DB::ISerialization::SubstreamPath & path, size_t i) const { path.push_back(Substream::VariantElement); diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h index 3f53dcf1339..e6b067dc68a 100644 --- a/src/DataTypes/Serializations/SerializationVariant.h +++ b/src/DataTypes/Serializations/SerializationVariant.h @@ -6,6 +6,13 @@ namespace DB { + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; +} + + /// Class for serializing/deserializing column with Variant type. /// It supports both text and binary bulk serializations/deserializations. /// @@ -18,6 +25,17 @@ namespace DB /// /// During binary bulk serialization it transforms local discriminators /// to global and serializes them into a separate stream VariantDiscriminators. +/// There are 2 modes of serialising discriminators: +/// Basic mode, when all discriminators are serialized as is row by row. +/// Compact mode, when we avoid writing the same discriminators in granules when there is +/// only one variant (or only NULLs) in the granule. +/// In compact mode we serialize granules in the following format: +/// +/// There are 2 different formats of granule - plain and compact. +/// Plain format is used when there are different discriminators in this granule, +/// in this format all discriminators are serialized as is row by row. +/// Compact format is used when all discriminators are the same in this granule, +/// in this case only this single discriminator is serialized. /// Each variant is serialized into a separate stream with path VariantElements/VariantElement /// (VariantElements stream is needed for correct sub-columns creation). We store and serialize /// variants in a sparse form (the size of a variant column equals to the number of its discriminator @@ -32,6 +50,25 @@ namespace DB class SerializationVariant : public ISerialization { public: + struct DiscriminatorsSerializationMode + { + enum Value + { + BASIC = 0, /// Store the whole discriminators column. + COMPACT = 1, /// Don't write discriminators in granule if all of them are the same. + }; + + static void checkMode(UInt64 mode) + { + if (mode > Value::COMPACT) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for SerializationVariant discriminators column."); + } + + explicit DiscriminatorsSerializationMode(UInt64 mode) : value(static_cast(mode)) { checkMode(mode); } + + Value value; + }; + using VariantSerializations = std::vector; explicit SerializationVariant( @@ -114,8 +151,36 @@ public: static std::vector getVariantsDeserializeTextOrder(const DataTypes & variant_types); private: + friend SerializationVariantElement; + void addVariantElementToPath(SubstreamPath & path, size_t i) const; + enum CompactDiscriminatorsGranuleFormat + { + PLAIN = 0, /// Granule has different discriminators and they are serialized as is row by row. + COMPACT = 1, /// Granule has single discriminator for all rows and it is serialized as single value. + }; + + /// State of currently deserialized granule. + struct DiscriminatorsDeserializationState + { + CompactDiscriminatorsGranuleFormat granule_format = CompactDiscriminatorsGranuleFormat::PLAIN; + size_t remaining_rows_in_granule = 0; + ColumnVariant::Discriminator compact_discr = 0; + }; + + struct SerializeBinaryBulkStateVariant; + struct DeserializeBinaryBulkStateVariant; + + std::vector deserializeCompactDiscriminators( + ColumnPtr & discriminators_column, + size_t limit, + ReadBuffer * stream, + bool continuous_reading, + DiscriminatorsDeserializationState & state) const; + + static void readDiscriminatorsGranuleStart(DiscriminatorsDeserializationState & state, ReadBuffer * stream); + bool tryDeserializeTextEscapedImpl(IColumn & column, const String & field, const FormatSettings & settings) const; bool tryDeserializeTextQuotedImpl(IColumn & column, const String & field, const FormatSettings & settings) const; bool tryDeserializeWholeTextImpl(IColumn & column, const String & field, const FormatSettings & settings) const; diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index 7d4487fe6da..27166dd0704 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -1,7 +1,9 @@ #include #include +#include #include #include +#include namespace DB { @@ -39,8 +41,12 @@ void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinary ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElement"); } -struct DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState +struct SerializationVariantElement::DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState { + DeserializeBinaryBulkStateVariantElement(UInt64 discriminators_version_) : discriminators_version(discriminators_version_) + { + } + /// During deserialization discriminators and variant streams can be shared. /// For example we can read several variant elements together: "select v.UInt32, v.String from table", /// or we can read the whole variant and some of variant elements: "select v, v.UInt32 from table". @@ -51,13 +57,24 @@ struct DeserializeBinaryBulkStateVariantElement : public ISerialization::Deseria /// substream cache correctly. ColumnPtr discriminators; ColumnPtr variant; - + SerializationVariant::DiscriminatorsSerializationMode discriminators_version; + SerializationVariant::DiscriminatorsDeserializationState discriminators_state; ISerialization::DeserializeBinaryBulkStatePtr variant_element_state; }; void SerializationVariantElement::deserializeBinaryBulkStatePrefix(DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const { - auto variant_element_state = std::make_shared(); + settings.path.push_back(Substream::VariantDiscriminators); + auto * discriminators_stream = settings.getter(settings.path); + settings.path.pop_back(); + + if (!discriminators_stream) + return; + + UInt64 version; + readBinaryLittleEndian(version, *discriminators_stream); + + auto variant_element_state = std::make_shared(version); addVariantToPath(settings.path); nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state); @@ -78,35 +95,53 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const { - auto * variant_element_state = checkAndGetState(state); - /// First, deserialize discriminators from Variant column. settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStateVariantElement * variant_element_state = nullptr; + std::optional variant_limit; if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) { + variant_element_state = checkAndGetState(state); variant_element_state->discriminators = cached_discriminators; } - else + else if (auto * discriminators_stream = settings.getter(settings.path)) { - auto * discriminators_stream = settings.getter(settings.path); - if (!discriminators_stream) - return; + variant_element_state = checkAndGetState(state); /// If we started to read a new column, reinitialize discriminators column in deserialization state. if (!variant_element_state->discriminators || result_column->empty()) variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create(); - SerializationNumber().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); + /// Deserialize discriminators according to serialization mode. + if (variant_element_state->discriminators_version.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC) + SerializationNumber().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); + else + variant_limit = deserializeCompactDiscriminators(variant_element_state->discriminators, limit, discriminators_stream, settings.continuous_reading, *variant_element_state); + addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators); } + else + { + settings.path.pop_back(); + return; + } + settings.path.pop_back(); - /// Iterate through new discriminators to calculate the limit for our variant. + /// We could read less than limit discriminators, but we will need actual number of read rows later. + size_t num_new_discriminators = variant_element_state->discriminators->size() - result_column->size(); + + /// Iterate through new discriminators to calculate the limit for our variant + /// if we didn't do it during discriminators deserialization. const auto & discriminators_data = assert_cast(*variant_element_state->discriminators).getData(); - size_t discriminators_offset = variant_element_state->discriminators->size() - limit; - size_t variant_limit = 0; - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) - variant_limit += (discriminators_data[i] == variant_discriminator); + size_t discriminators_offset = variant_element_state->discriminators->size() - num_new_discriminators; + if (!variant_limit) + { + variant_limit = 0; + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + *variant_limit += (discriminators_data[i] == variant_discriminator); + } /// Now we know the limit for our variant and can deserialize it. @@ -117,19 +152,19 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( auto & nullable_column = assert_cast(*mutable_column); NullMap & null_map = nullable_column.getNullMapData(); /// If we have only our discriminator in range, fill null map with 0. - if (variant_limit == limit) + if (variant_limit == num_new_discriminators) { - null_map.resize_fill(null_map.size() + limit, 0); + null_map.resize_fill(null_map.size() + num_new_discriminators, 0); } /// If no our discriminator in current range, fill null map with 1. else if (variant_limit == 0) { - null_map.resize_fill(null_map.size() + limit, 1); + null_map.resize_fill(null_map.size() + num_new_discriminators, 1); } /// Otherwise we should iterate through discriminators to fill null map. else { - null_map.reserve(null_map.size() + limit); + null_map.reserve(null_map.size() + num_new_discriminators); for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) null_map.push_back(discriminators_data[i] != variant_discriminator); } @@ -151,12 +186,12 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( /// If nothing to deserialize, just insert defaults. if (variant_limit == 0) { - mutable_column->insertManyDefaults(limit); + mutable_column->insertManyDefaults(num_new_discriminators); return; } addVariantToPath(settings.path); - nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, variant_limit, settings, variant_element_state->variant_element_state, cache); + nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, *variant_limit, settings, variant_element_state->variant_element_state, cache); removeVariantFromPath(settings.path); /// If nothing was deserialized when variant_limit > 0 @@ -165,16 +200,16 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( /// In this case we should just insert default values. if (variant_element_state->variant->empty()) { - mutable_column->insertManyDefaults(limit); + mutable_column->insertManyDefaults(num_new_discriminators); return; } - size_t variant_offset = variant_element_state->variant->size() - variant_limit; + size_t variant_offset = variant_element_state->variant->size() - *variant_limit; /// If we have only our discriminator in range, insert the whole range to result column. - if (variant_limit == limit) + if (variant_limit == num_new_discriminators) { - mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, variant_limit); + mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, *variant_limit); } /// Otherwise iterate through discriminators and insert value from variant or default value depending on the discriminator. else @@ -189,6 +224,56 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( } } +size_t SerializationVariantElement::deserializeCompactDiscriminators( + DB::ColumnPtr & discriminators_column, + size_t limit, + DB::ReadBuffer * stream, + bool continuous_reading, + DB::SerializationVariantElement::DeserializeBinaryBulkStateVariantElement & state) const +{ + auto & discriminators = assert_cast(*discriminators_column->assumeMutable()); + auto & discriminators_data = discriminators.getData(); + + /// Reset state if we are reading from the start of the granule and not from the previous position in the file. + if (!continuous_reading) + state.discriminators_state.remaining_rows_in_granule = 0; + + /// Calculate our variant limit during discriminators deserialization. + size_t variant_limit = 0; + while (limit) + { + /// If we read all rows from current granule, start reading the next one. + if (state.discriminators_state.remaining_rows_in_granule == 0) + { + if (stream->eof()) + return variant_limit; + + SerializationVariant::readDiscriminatorsGranuleStart(state.discriminators_state, stream); + } + + size_t limit_in_granule = std::min(limit, state.discriminators_state.remaining_rows_in_granule); + if (state.discriminators_state.granule_format == SerializationVariant::CompactDiscriminatorsGranuleFormat::COMPACT) + { + auto & data = discriminators.getData(); + data.resize_fill(data.size() + limit_in_granule, state.discriminators_state.compact_discr); + if (state.discriminators_state.compact_discr == variant_discriminator) + variant_limit += limit_in_granule; + } + else + { + SerializationNumber().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0); + size_t start = discriminators_data.size() - limit_in_granule; + for (size_t i = start; i != discriminators_data.size(); ++i) + variant_limit += (discriminators_data[i] == variant_discriminator); + } + + state.discriminators_state.remaining_rows_in_granule -= limit_in_granule; + limit -= limit_in_granule; + } + + return variant_limit; +} + void SerializationVariantElement::addVariantToPath(DB::ISerialization::SubstreamPath & path) const { path.push_back(Substream::VariantElements); diff --git a/src/DataTypes/Serializations/SerializationVariantElement.h b/src/DataTypes/Serializations/SerializationVariantElement.h index aafecf43d39..e86d7ecefbe 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.h +++ b/src/DataTypes/Serializations/SerializationVariantElement.h @@ -62,6 +62,15 @@ public: private: friend SerializationVariant; + struct DeserializeBinaryBulkStateVariantElement; + + size_t deserializeCompactDiscriminators( + ColumnPtr & discriminators_column, + size_t limit, + ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStateVariantElement & state) const; + void addVariantToPath(SubstreamPath & path) const; void removeVariantFromPath(SubstreamPath & path) const; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 1605e5cdb9a..b25b4042b5f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -130,7 +130,8 @@ void writeColumnSingleGranule( const SerializationPtr & serialization, ISerialization::OutputStreamGetter stream_getter, size_t from_row, - size_t number_of_rows) + size_t number_of_rows, + const MergeTreeSettingsPtr & settings) { ISerialization::SerializeBinaryBulkStatePtr state; ISerialization::SerializeBinaryBulkSettings serialize_settings; @@ -138,6 +139,7 @@ void writeColumnSingleGranule( serialize_settings.getter = stream_getter; serialize_settings.position_independent_encoding = true; serialize_settings.low_cardinality_max_dictionary_size = 0; + serialize_settings.use_compact_variant_discriminators_serialization = settings->use_compact_variant_discriminators_serialization; serialization->serializeBinaryBulkStatePrefix(*column.column, serialize_settings, state); serialization->serializeBinaryBulkWithMultipleStreams(*column.column, from_row, number_of_rows, serialize_settings, state); @@ -231,7 +233,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G writeColumnSingleGranule( block.getByName(name_and_type->name), data_part->getSerialization(name_and_type->name), - stream_getter, granule.start_row, granule.rows_to_write); + stream_getter, granule.start_row, granule.rows_to_write, storage.getSettings()); /// Each type always have at least one substream prev_stream->hashing_buf.next(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 6a3b08d4d65..89abb1f242f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -399,19 +399,21 @@ void MergeTreeDataPartWriterWide::writeColumn( const auto & [name, type] = name_and_type; auto [it, inserted] = serialization_states.emplace(name, nullptr); auto serialization = data_part->getSerialization(name_and_type.name); + const auto & global_settings = storage.getContext()->getSettingsRef(); if (inserted) { ISerialization::SerializeBinaryBulkSettings serialize_settings; + serialize_settings.use_compact_variant_discriminators_serialization = storage.getSettings()->use_compact_variant_discriminators_serialization; serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second); } - const auto & global_settings = storage.getContext()->getSettingsRef(); ISerialization::SerializeBinaryBulkSettings serialize_settings; serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); serialize_settings.low_cardinality_max_dictionary_size = global_settings.low_cardinality_max_dictionary_size; serialize_settings.low_cardinality_use_single_dictionary_for_part = global_settings.low_cardinality_use_single_dictionary_for_part != 0; + serialize_settings.use_compact_variant_discriminators_serialization = storage.getSettings()->use_compact_variant_discriminators_serialization; for (const auto & granule : granules) { @@ -600,6 +602,7 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksum ISerialization::SerializeBinaryBulkSettings serialize_settings; serialize_settings.low_cardinality_max_dictionary_size = global_settings.low_cardinality_max_dictionary_size; serialize_settings.low_cardinality_use_single_dictionary_for_part = global_settings.low_cardinality_use_single_dictionary_for_part != 0; + serialize_settings.use_compact_variant_discriminators_serialization = storage.getSettings()->use_compact_variant_discriminators_serialization; WrittenOffsetColumns offset_columns; if (rows_written_in_last_mark > 0) { diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 394a22835f1..1fd886a6216 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -128,6 +128,9 @@ size_t MergeTreeReaderWide::readRows( size_t num_columns = res_columns.size(); checkNumberOfColumns(num_columns); + if (deserialize_binary_bulk_state_map.empty()) + readPrefixes(num_columns, current_task_last_mark); + if (num_columns == 0) return max_rows_to_read; @@ -290,19 +293,28 @@ static ReadBuffer * getStream( return stream.getDataBuffer(); } +void MergeTreeReaderWide::readPrefixes(size_t num_columns, size_t current_task_last_mark) +{ + for (size_t pos = 0; pos < num_columns; ++pos) + { + const auto & column_to_read = columns_to_read[pos]; + deserializePrefix(serializations[pos], column_to_read, current_task_last_mark); + } +} + void MergeTreeReaderWide::deserializePrefix( const SerializationPtr & serialization, const NameAndTypePair & name_and_type, - size_t current_task_last_mark, - ISerialization::SubstreamsCache & cache) + size_t current_task_last_mark) { const auto & name = name_and_type.name; if (!deserialize_binary_bulk_state_map.contains(name)) { ISerialization::DeserializeBinaryBulkSettings deserialize_settings; + ISerialization::SubstreamsCache empty_cache; deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) { - return getStream(/* seek_to_start = */true, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, 0, /* seek_to_mark = */false, current_task_last_mark, cache); + return getStream(/* seek_to_start = */true, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, 0, /* seek_to_mark = */false, current_task_last_mark, empty_cache); }; serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name]); } @@ -317,8 +329,6 @@ void MergeTreeReaderWide::prefetchForColumn( size_t current_task_last_mark, ISerialization::SubstreamsCache & cache) { - deserializePrefix(serialization, name_and_type, current_task_last_mark, cache); - serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(name_and_type, substream_path, data_part_info_for_read->getChecksums()); @@ -344,9 +354,6 @@ void MergeTreeReaderWide::readData( double & avg_value_size_hint = avg_value_size_hints[name_and_type.name]; ISerialization::DeserializeBinaryBulkSettings deserialize_settings; deserialize_settings.avg_value_size_hint = avg_value_size_hint; - - deserializePrefix(serialization, name_and_type, current_task_last_mark, cache); - deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) { bool seek_to_mark = !was_prefetched && !continue_reading; diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index a9a5526dd65..617e26864a9 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -64,11 +64,12 @@ private: size_t current_task_last_mark, ISerialization::SubstreamsCache & cache); + void readPrefixes(size_t num_columns, size_t current_task_last_mark); + void deserializePrefix( const SerializationPtr & serialization, const NameAndTypePair & name_and_type, - size_t current_task_last_mark, - ISerialization::SubstreamsCache & cache); + size_t current_task_last_mark); std::unordered_map caches; std::unordered_set prefetched_streams; diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index a00508fd1c1..da8fa4875dd 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -43,6 +43,7 @@ struct Settings; M(UInt64, compact_parts_max_granules_to_buffer, 128, "Only available in ClickHouse Cloud", 0) \ M(UInt64, compact_parts_merge_max_bytes_to_prefetch_part, 16 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ M(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \ + M(Bool, use_compact_variant_discriminators_serialization, true, "Use compact version of Variant discriminators serialization.", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ From f70851f5f492b9a372d0715e90fa7753383038f3 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 18 Apr 2024 22:39:26 +0000 Subject: [PATCH 040/439] Add new MergeTree setting in docs --- docs/en/operations/settings/merge-tree-settings.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 76250b80476..ff21d792aee 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -885,3 +885,8 @@ Default value: false **See Also** - [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting + +## use_compact_variant_discriminators_serialization {#use_compact_variant_discriminators_serialization} + +Enables compact mode for binary serialization of discriminators in Variant data type. +This mode allows to use significantly less memory for storing discriminators in parts when there is mostly one variant or a lot of NULL values. From e524f3817254488c2e43b3aa9ea350e236910493 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 18 Apr 2024 22:40:30 +0000 Subject: [PATCH 041/439] Add default value of new setting in docs --- docs/en/operations/settings/merge-tree-settings.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index ff21d792aee..0f54adb5c7b 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -890,3 +890,5 @@ Default value: false Enables compact mode for binary serialization of discriminators in Variant data type. This mode allows to use significantly less memory for storing discriminators in parts when there is mostly one variant or a lot of NULL values. + +Default value: true From 7b6a8ed28e4f985fc1866d316a59bc205fd919ca Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 19 Apr 2024 16:17:25 +0000 Subject: [PATCH 042/439] Introduce deserialization states cache --- .../Serializations/ISerialization.cpp | 17 +++++ src/DataTypes/Serializations/ISerialization.h | 8 ++- .../Serializations/SerializationArray.cpp | 5 +- .../Serializations/SerializationArray.h | 3 +- .../Serializations/SerializationInterval.cpp | 4 +- .../Serializations/SerializationInterval.h | 2 +- .../SerializationLowCardinality.cpp | 3 +- .../SerializationLowCardinality.h | 3 +- .../Serializations/SerializationMap.cpp | 5 +- .../Serializations/SerializationMap.h | 3 +- .../Serializations/SerializationNamed.cpp | 5 +- .../Serializations/SerializationNamed.h | 3 +- .../Serializations/SerializationNullable.cpp | 5 +- .../Serializations/SerializationNullable.h | 3 +- .../Serializations/SerializationObject.cpp | 5 +- .../Serializations/SerializationObject.h | 3 +- .../Serializations/SerializationSparse.cpp | 5 +- .../Serializations/SerializationSparse.h | 3 +- .../Serializations/SerializationTuple.cpp | 5 +- .../Serializations/SerializationTuple.h | 3 +- .../Serializations/SerializationVariant.cpp | 72 +++++++++++-------- .../Serializations/SerializationVariant.h | 20 +++--- .../SerializationVariantElement.cpp | 64 ++++++++++------- .../SerializationVariantElement.h | 5 +- .../Serializations/SerializationWrapper.cpp | 5 +- .../Serializations/SerializationWrapper.h | 3 +- .../tests/gtest_object_serialization.cpp | 2 +- src/Formats/NativeReader.cpp | 2 +- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 2 +- .../MergeTree/MergeTreeReaderCompact.cpp | 4 +- .../MergeTree/MergeTreeReaderWide.cpp | 38 +++++----- src/Storages/MergeTree/MergeTreeReaderWide.h | 12 ++-- src/Storages/StorageLog.cpp | 2 +- 33 files changed, 195 insertions(+), 129 deletions(-) diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index a3a28f8091c..5c8a0d49038 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -271,6 +271,23 @@ ColumnPtr ISerialization::getFromSubstreamsCache(SubstreamsCache * cache, const return it == cache->end() ? nullptr : it->second; } +void ISerialization::addToSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path, DeserializeBinaryBulkStatePtr state) +{ + if (!cache || path.empty()) + return; + + cache->emplace(getSubcolumnNameForStream(path), state); +} + +ISerialization::DeserializeBinaryBulkStatePtr ISerialization::getFromSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path) +{ + if (!cache || path.empty()) + return nullptr; + + auto it = cache->find(getSubcolumnNameForStream(path)); + return it == cache->end() ? nullptr : it->second; +} + bool ISerialization::isSpecialCompressionAllowed(const SubstreamPath & path) { for (const auto & elem : path) diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 934a92ecfba..408b5fe2133 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -231,6 +231,9 @@ public: using SerializeBinaryBulkStatePtr = std::shared_ptr; using DeserializeBinaryBulkStatePtr = std::shared_ptr; + using SubstreamsDeserializeStatesCache = std::unordered_map; + + struct SerializeBinaryBulkSettings { OutputStreamGetter getter; @@ -275,7 +278,8 @@ public: /// Call before before deserializeBinaryBulkWithMultipleStreams chain to get DeserializeBinaryBulkStatePtr. virtual void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & /*settings*/, - DeserializeBinaryBulkStatePtr & /*state*/) const {} + DeserializeBinaryBulkStatePtr & /*state*/, + SubstreamsDeserializeStatesCache * /*cache*/) const {} /** 'offset' and 'limit' are used to specify range. * limit = 0 - means no limit. @@ -394,6 +398,8 @@ public: static void addToSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path, ColumnPtr column); static ColumnPtr getFromSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path); + static void addToSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path, DeserializeBinaryBulkStatePtr state); + static DeserializeBinaryBulkStatePtr getFromSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path); static bool isSpecialCompressionAllowed(const SubstreamPath & path); diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index e8aab615849..d6546b338b5 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -284,10 +284,11 @@ void SerializationArray::serializeBinaryBulkStateSuffix( void SerializationArray::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { settings.path.push_back(Substream::ArrayElements); - nested->deserializeBinaryBulkStatePrefix(settings, state); + nested->deserializeBinaryBulkStatePrefix(settings, state, cache); settings.path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationArray.h b/src/DataTypes/Serializations/SerializationArray.h index 82f5e8bce45..429e8fa64cc 100644 --- a/src/DataTypes/Serializations/SerializationArray.h +++ b/src/DataTypes/Serializations/SerializationArray.h @@ -55,7 +55,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationInterval.cpp b/src/DataTypes/Serializations/SerializationInterval.cpp index 59086d8aef3..2157566895d 100644 --- a/src/DataTypes/Serializations/SerializationInterval.cpp +++ b/src/DataTypes/Serializations/SerializationInterval.cpp @@ -68,9 +68,9 @@ void SerializationInterval::deserializeBinaryBulk(IColumn & column, ReadBuffer & } void SerializationInterval::deserializeBinaryBulkStatePrefix( - DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { - dispatch(&ISerialization::deserializeBinaryBulkStatePrefix, FormatSettings::IntervalOutputFormat::Numeric, settings, state); + dispatch(&ISerialization::deserializeBinaryBulkStatePrefix, FormatSettings::IntervalOutputFormat::Numeric, settings, state, cache); } diff --git a/src/DataTypes/Serializations/SerializationInterval.h b/src/DataTypes/Serializations/SerializationInterval.h index a4e6c204e4f..05dfdb00afc 100644 --- a/src/DataTypes/Serializations/SerializationInterval.h +++ b/src/DataTypes/Serializations/SerializationInterval.h @@ -34,7 +34,7 @@ public: void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override; void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; - void deserializeBinaryBulkStatePrefix(DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const override; + void deserializeBinaryBulkStatePrefix(DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const override; void deserializeBinaryBulkWithMultipleStreams( ColumnPtr & column, size_t limit, diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp index 9efe05042ed..802da263d89 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp +++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp @@ -267,7 +267,8 @@ void SerializationLowCardinality::serializeBinaryBulkStateSuffix( void SerializationLowCardinality::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * /*cache*/) const { settings.path.push_back(Substream::DictionaryKeys); auto * stream = settings.getter(settings.path); diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.h b/src/DataTypes/Serializations/SerializationLowCardinality.h index d2c3a95c702..aa64e956a64 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.h +++ b/src/DataTypes/Serializations/SerializationLowCardinality.h @@ -33,7 +33,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 7b6f87baf2e..dac4fbe88e0 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -420,9 +420,10 @@ void SerializationMap::serializeBinaryBulkStateSuffix( void SerializationMap::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { - nested->deserializeBinaryBulkStatePrefix(settings, state); + nested->deserializeBinaryBulkStatePrefix(settings, state, cache); } diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index 3e27ef1b04a..cfcde445c1f 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -51,7 +51,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationNamed.cpp b/src/DataTypes/Serializations/SerializationNamed.cpp index 2792827e690..07f5f9ea7ed 100644 --- a/src/DataTypes/Serializations/SerializationNamed.cpp +++ b/src/DataTypes/Serializations/SerializationNamed.cpp @@ -54,10 +54,11 @@ void SerializationNamed::serializeBinaryBulkStateSuffix( void SerializationNamed::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { addToPath(settings.path); - nested_serialization->deserializeBinaryBulkStatePrefix(settings, state); + nested_serialization->deserializeBinaryBulkStatePrefix(settings, state, cache); settings.path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationNamed.h b/src/DataTypes/Serializations/SerializationNamed.h index 0633ba2ea6f..bb2161e40e6 100644 --- a/src/DataTypes/Serializations/SerializationNamed.h +++ b/src/DataTypes/Serializations/SerializationNamed.h @@ -36,7 +36,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 4d31451f92d..477349f955d 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -95,10 +95,11 @@ void SerializationNullable::serializeBinaryBulkStateSuffix( void SerializationNullable::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { settings.path.push_back(Substream::NullableElements); - nested->deserializeBinaryBulkStatePrefix(settings, state); + nested->deserializeBinaryBulkStatePrefix(settings, state, cache); settings.path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationNullable.h b/src/DataTypes/Serializations/SerializationNullable.h index 37858ccdefd..f7d2d2eadf0 100644 --- a/src/DataTypes/Serializations/SerializationNullable.h +++ b/src/DataTypes/Serializations/SerializationNullable.h @@ -29,7 +29,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index 67bf7af7799..88244a89204 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -210,7 +210,8 @@ void SerializationObject::serializeBinaryBulkStateSuffix( template void SerializationObject::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { checkSerializationIsSupported(settings); if (state) @@ -258,7 +259,7 @@ void SerializationObject::deserializeBinaryBulkStatePrefix( } settings.path.push_back(Substream::ObjectData); - state_object->nested_serialization->deserializeBinaryBulkStatePrefix(settings, state_object->nested_state); + state_object->nested_serialization->deserializeBinaryBulkStatePrefix(settings, state_object->nested_state, cache); settings.path.pop_back(); state = std::move(state_object); diff --git a/src/DataTypes/Serializations/SerializationObject.h b/src/DataTypes/Serializations/SerializationObject.h index 39e1c514640..bdc5a9d7e2f 100644 --- a/src/DataTypes/Serializations/SerializationObject.h +++ b/src/DataTypes/Serializations/SerializationObject.h @@ -41,7 +41,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * /*cache*/) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationSparse.cpp b/src/DataTypes/Serializations/SerializationSparse.cpp index 4d7514271ad..b125df564b3 100644 --- a/src/DataTypes/Serializations/SerializationSparse.cpp +++ b/src/DataTypes/Serializations/SerializationSparse.cpp @@ -242,12 +242,13 @@ void SerializationSparse::serializeBinaryBulkStateSuffix( void SerializationSparse::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { auto state_sparse = std::make_shared(); settings.path.push_back(Substream::SparseElements); - nested->deserializeBinaryBulkStatePrefix(settings, state_sparse->nested); + nested->deserializeBinaryBulkStatePrefix(settings, state_sparse->nested, cache); settings.path.pop_back(); state = std::move(state_sparse); diff --git a/src/DataTypes/Serializations/SerializationSparse.h b/src/DataTypes/Serializations/SerializationSparse.h index b1ed7b613f0..a55856bacf0 100644 --- a/src/DataTypes/Serializations/SerializationSparse.h +++ b/src/DataTypes/Serializations/SerializationSparse.h @@ -43,7 +43,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; /// Allows to write ColumnSparse and other columns in sparse serialization. void serializeBinaryBulkWithMultipleStreams( diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 632a019d2d9..bb7c19aa78d 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -606,13 +606,14 @@ void SerializationTuple::serializeBinaryBulkStateSuffix( void SerializationTuple::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { auto tuple_state = std::make_shared(); tuple_state->states.resize(elems.size()); for (size_t i = 0; i < elems.size(); ++i) - elems[i]->deserializeBinaryBulkStatePrefix(settings, tuple_state->states[i]); + elems[i]->deserializeBinaryBulkStatePrefix(settings, tuple_state->states[i], cache); state = std::move(tuple_state); } diff --git a/src/DataTypes/Serializations/SerializationTuple.h b/src/DataTypes/Serializations/SerializationTuple.h index d9c63a05217..810673d8b21 100644 --- a/src/DataTypes/Serializations/SerializationTuple.h +++ b/src/DataTypes/Serializations/SerializationTuple.h @@ -53,7 +53,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index 0490ed62c3b..81c8a1f9ffa 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -70,25 +70,20 @@ void SerializationVariant::enumerateStreams( settings.path.pop_back(); } -struct SerializationVariant::SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState +struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState { SerializeBinaryBulkStateVariant(UInt64 mode) : discriminators_mode(mode) { } SerializationVariant::DiscriminatorsSerializationMode discriminators_mode; - std::vector states; + std::vector variant_states; }; -struct SerializationVariant::DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState +struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState { - DeserializeBinaryBulkStateVariant(UInt64 mode) : discriminators_mode(mode) - { - } - - SerializationVariant::DiscriminatorsSerializationMode discriminators_mode; - std::vector states; - SerializationVariant::DiscriminatorsDeserializationState discriminators_state; + ISerialization::DeserializeBinaryBulkStatePtr discriminators_state; + std::vector variant_states; }; void SerializationVariant::serializeBinaryBulkStatePrefix( @@ -108,14 +103,14 @@ void SerializationVariant::serializeBinaryBulkStatePrefix( const ColumnVariant & col = assert_cast(column); auto variant_state = std::make_shared(mode); - variant_state->states.resize(variants.size()); + variant_state->variant_states.resize(variants.size()); settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->states[i]); + variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->variant_states[i]); settings.path.pop_back(); } @@ -134,7 +129,7 @@ void SerializationVariant::serializeBinaryBulkStateSuffix( for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->states[i]); + variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->variant_states[i]); settings.path.pop_back(); } settings.path.pop_back(); @@ -143,25 +138,39 @@ void SerializationVariant::serializeBinaryBulkStateSuffix( void SerializationVariant::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { settings.path.push_back(Substream::VariantDiscriminators); - auto * discriminators_stream = settings.getter(settings.path); + + DeserializeBinaryBulkStatePtr discriminators_state; + if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path)) + { + discriminators_state = cached_state; + } + else if (auto * discriminators_stream = settings.getter(settings.path)) + { + UInt64 mode; + readBinaryLittleEndian(mode, *discriminators_stream); + discriminators_state = std::make_shared(mode); + } + else + { + settings.path.pop_back(); + return; + } + settings.path.pop_back(); - if (!discriminators_stream) - return; - - UInt64 mode; - readBinaryLittleEndian(mode, *discriminators_stream); - auto variant_state = std::make_shared(mode); - variant_state->states.resize(variants.size()); + auto variant_state = std::make_shared(); + variant_state->discriminators_state = discriminators_state; + variant_state->variant_states.resize(variants.size()); settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i]); + variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->variant_states[i], cache); settings.path.pop_back(); } @@ -216,7 +225,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams( settings.path.push_back(Substream::VariantElements); addVariantElementToPath(settings.path, non_empty_global_discr); /// We can use the same offset/limit as for whole Variant column - variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]); + variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->variant_states[non_empty_global_discr]); settings.path.pop_back(); settings.path.pop_back(); return; @@ -266,7 +275,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams( for (size_t i = 0; i != variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->states[i]); + variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->variant_states[i]); settings.path.pop_back(); } settings.path.pop_back(); @@ -335,7 +344,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams( variant_offsets_and_limits[i].first, variant_offsets_and_limits[i].second, settings, - variant_state->states[i]); + variant_state->variant_states[i]); settings.path.pop_back(); } } @@ -370,12 +379,13 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( else if (auto * discriminators_stream = settings.getter(settings.path)) { variant_state = checkAndGetState(state); + auto discriminators_state = checkAndGetState(variant_state->discriminators_state); /// Deserialize discriminators according to serialization mode. - if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::BASIC) + if (discriminators_state->mode.value == DiscriminatorsSerializationMode::BASIC) SerializationNumber().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0); else - variant_limits = deserializeCompactDiscriminators(col.getLocalDiscriminatorsPtr(), limit, discriminators_stream, settings.continuous_reading, variant_state->discriminators_state); + variant_limits = deserializeCompactDiscriminators(col.getLocalDiscriminatorsPtr(), limit, discriminators_stream, settings.continuous_reading, *discriminators_state); addToSubstreamsCache(cache, settings.path, col.getLocalDiscriminatorsPtr()); } @@ -419,7 +429,7 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( for (size_t i = 0; i != variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->states[i], cache); + variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->variant_states[i], cache); settings.path.pop_back(); } settings.path.pop_back(); @@ -494,7 +504,7 @@ std::vector SerializationVariant::deserializeCompactDiscriminators( size_t limit, ReadBuffer * stream, bool continuous_reading, - DiscriminatorsDeserializationState & state) const + DeserializeBinaryBulkStateVariantDiscriminators & state) const { auto & discriminators = assert_cast(*discriminators_column->assumeMutable()); auto & discriminators_data = discriminators.getData(); @@ -543,7 +553,7 @@ std::vector SerializationVariant::deserializeCompactDiscriminators( return variant_limits; } -void SerializationVariant::readDiscriminatorsGranuleStart(DB::SerializationVariant::DiscriminatorsDeserializationState & state, DB::ReadBuffer * stream) +void SerializationVariant::readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, DB::ReadBuffer * stream) { UInt64 granule_size; readVarUInt(granule_size, *stream); diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h index e6b067dc68a..a2f077964e6 100644 --- a/src/DataTypes/Serializations/SerializationVariant.h +++ b/src/DataTypes/Serializations/SerializationVariant.h @@ -96,7 +96,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, @@ -161,25 +162,28 @@ private: COMPACT = 1, /// Granule has single discriminator for all rows and it is serialized as single value. }; - /// State of currently deserialized granule. - struct DiscriminatorsDeserializationState + struct DeserializeBinaryBulkStateVariantDiscriminators : public ISerialization::DeserializeBinaryBulkState { + DeserializeBinaryBulkStateVariantDiscriminators(UInt64 mode_) : mode(mode_) + { + } + + DiscriminatorsSerializationMode mode; + + /// Deserialize state of currently read granule in compact mode. CompactDiscriminatorsGranuleFormat granule_format = CompactDiscriminatorsGranuleFormat::PLAIN; size_t remaining_rows_in_granule = 0; ColumnVariant::Discriminator compact_discr = 0; }; - struct SerializeBinaryBulkStateVariant; - struct DeserializeBinaryBulkStateVariant; - std::vector deserializeCompactDiscriminators( ColumnPtr & discriminators_column, size_t limit, ReadBuffer * stream, bool continuous_reading, - DiscriminatorsDeserializationState & state) const; + DeserializeBinaryBulkStateVariantDiscriminators & state) const; - static void readDiscriminatorsGranuleStart(DiscriminatorsDeserializationState & state, ReadBuffer * stream); + static void readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, ReadBuffer * stream); bool tryDeserializeTextEscapedImpl(IColumn & column, const String & field, const FormatSettings & settings) const; bool tryDeserializeTextQuotedImpl(IColumn & column, const String & field, const FormatSettings & settings) const; diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index 27166dd0704..19ac268268e 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -43,10 +43,6 @@ void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinary struct SerializationVariantElement::DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState { - DeserializeBinaryBulkStateVariantElement(UInt64 discriminators_version_) : discriminators_version(discriminators_version_) - { - } - /// During deserialization discriminators and variant streams can be shared. /// For example we can read several variant elements together: "select v.UInt32, v.String from table", /// or we can read the whole variant and some of variant elements: "select v, v.UInt32 from table". @@ -57,27 +53,39 @@ struct SerializationVariantElement::DeserializeBinaryBulkStateVariantElement : p /// substream cache correctly. ColumnPtr discriminators; ColumnPtr variant; - SerializationVariant::DiscriminatorsSerializationMode discriminators_version; - SerializationVariant::DiscriminatorsDeserializationState discriminators_state; + ISerialization::DeserializeBinaryBulkStatePtr discriminators_state; ISerialization::DeserializeBinaryBulkStatePtr variant_element_state; }; -void SerializationVariantElement::deserializeBinaryBulkStatePrefix(DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const +void SerializationVariantElement::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { settings.path.push_back(Substream::VariantDiscriminators); - auto * discriminators_stream = settings.getter(settings.path); + + DeserializeBinaryBulkStatePtr discriminators_state; + if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path)) + { + discriminators_state = cached_state; + } + else if (auto * discriminators_stream = settings.getter(settings.path)) + { + UInt64 mode; + readBinaryLittleEndian(mode, *discriminators_stream); + discriminators_state = std::make_shared(mode); + } + else + { + settings.path.pop_back(); + return; + } + settings.path.pop_back(); - if (!discriminators_stream) - return; - - UInt64 version; - readBinaryLittleEndian(version, *discriminators_stream); - - auto variant_element_state = std::make_shared(version); + auto variant_element_state = std::make_shared(); + variant_element_state->discriminators_state = discriminators_state; addVariantToPath(settings.path); - nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state); + nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state, cache); removeVariantFromPath(settings.path); state = std::move(variant_element_state); @@ -108,13 +116,14 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( else if (auto * discriminators_stream = settings.getter(settings.path)) { variant_element_state = checkAndGetState(state); + auto discriminators_state = checkAndGetState(variant_element_state->discriminators_state); /// If we started to read a new column, reinitialize discriminators column in deserialization state. if (!variant_element_state->discriminators || result_column->empty()) variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create(); /// Deserialize discriminators according to serialization mode. - if (variant_element_state->discriminators_version.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC) + if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC) SerializationNumber().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); else variant_limit = deserializeCompactDiscriminators(variant_element_state->discriminators, limit, discriminators_stream, settings.continuous_reading, *variant_element_state); @@ -229,34 +238,35 @@ size_t SerializationVariantElement::deserializeCompactDiscriminators( size_t limit, DB::ReadBuffer * stream, bool continuous_reading, - DB::SerializationVariantElement::DeserializeBinaryBulkStateVariantElement & state) const + DeserializeBinaryBulkStateVariantElement & variant_element_state) const { + auto discriminators_state = checkAndGetState(variant_element_state.discriminators_state); auto & discriminators = assert_cast(*discriminators_column->assumeMutable()); auto & discriminators_data = discriminators.getData(); /// Reset state if we are reading from the start of the granule and not from the previous position in the file. if (!continuous_reading) - state.discriminators_state.remaining_rows_in_granule = 0; + discriminators_state->remaining_rows_in_granule = 0; /// Calculate our variant limit during discriminators deserialization. size_t variant_limit = 0; while (limit) { /// If we read all rows from current granule, start reading the next one. - if (state.discriminators_state.remaining_rows_in_granule == 0) + if (discriminators_state->remaining_rows_in_granule == 0) { if (stream->eof()) return variant_limit; - SerializationVariant::readDiscriminatorsGranuleStart(state.discriminators_state, stream); + SerializationVariant::readDiscriminatorsGranuleStart(*discriminators_state, stream); } - size_t limit_in_granule = std::min(limit, state.discriminators_state.remaining_rows_in_granule); - if (state.discriminators_state.granule_format == SerializationVariant::CompactDiscriminatorsGranuleFormat::COMPACT) + size_t limit_in_granule = std::min(limit, discriminators_state->remaining_rows_in_granule); + if (discriminators_state->granule_format == SerializationVariant::CompactDiscriminatorsGranuleFormat::COMPACT) { auto & data = discriminators.getData(); - data.resize_fill(data.size() + limit_in_granule, state.discriminators_state.compact_discr); - if (state.discriminators_state.compact_discr == variant_discriminator) + data.resize_fill(data.size() + limit_in_granule, discriminators_state->compact_discr); + if (discriminators_state->compact_discr == variant_discriminator) variant_limit += limit_in_granule; } else @@ -267,7 +277,7 @@ size_t SerializationVariantElement::deserializeCompactDiscriminators( variant_limit += (discriminators_data[i] == variant_discriminator); } - state.discriminators_state.remaining_rows_in_granule -= limit_in_granule; + discriminators_state->remaining_rows_in_granule -= limit_in_granule; limit -= limit_in_granule; } @@ -336,7 +346,7 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB: /// Special case for LowCardinality. We want the result to be LowCardinality(Nullable), /// but we don't have a good way to apply null-mask for LowCardinality(), so, we first - /// convert our column to LowCardinality(Nullable()) and then use expand which will + /// cont our column to LowCardinality(Nullable()) and then use expand which will /// fill rows with 0 in mask with default value (that is NULL). if (prev->lowCardinality()) res_column = assert_cast(*res_column).cloneNullable(); diff --git a/src/DataTypes/Serializations/SerializationVariantElement.h b/src/DataTypes/Serializations/SerializationVariantElement.h index e86d7ecefbe..f7779fb7b2d 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.h +++ b/src/DataTypes/Serializations/SerializationVariantElement.h @@ -43,7 +43,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, @@ -69,7 +70,7 @@ private: size_t limit, ReadBuffer * stream, bool continuous_reading, - DeserializeBinaryBulkStateVariantElement & state) const; + DeserializeBinaryBulkStateVariantElement & variant_element_state) const; void addVariantToPath(SubstreamPath & path) const; void removeVariantFromPath(SubstreamPath & path) const; diff --git a/src/DataTypes/Serializations/SerializationWrapper.cpp b/src/DataTypes/Serializations/SerializationWrapper.cpp index bde52bb8096..ecef533d7e0 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.cpp +++ b/src/DataTypes/Serializations/SerializationWrapper.cpp @@ -29,9 +29,10 @@ void SerializationWrapper::serializeBinaryBulkStateSuffix( void SerializationWrapper::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { - nested_serialization->deserializeBinaryBulkStatePrefix(settings, state); + nested_serialization->deserializeBinaryBulkStatePrefix(settings, state, cache); } void SerializationWrapper::serializeBinaryBulkWithMultipleStreams( diff --git a/src/DataTypes/Serializations/SerializationWrapper.h b/src/DataTypes/Serializations/SerializationWrapper.h index 6c5e2046062..882f17bba0a 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.h +++ b/src/DataTypes/Serializations/SerializationWrapper.h @@ -36,7 +36,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp b/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp index fc7432d5bf6..c6337a31fce 100644 --- a/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp +++ b/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp @@ -49,7 +49,7 @@ TEST(SerializationObject, FromString) settings.position_independent_encoding = false; settings.getter = [&in](const auto &) { return ∈ }; - serialization->deserializeBinaryBulkStatePrefix(settings, state); + serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr); serialization->deserializeBinaryBulkWithMultipleStreams(result_column, column_string->size(), settings, state, nullptr); } diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index 8286b24d0a6..39915b0735e 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -93,7 +93,7 @@ void NativeReader::readData(const ISerialization & serialization, ColumnPtr & co ISerialization::DeserializeBinaryBulkStatePtr state; - serialization.deserializeBinaryBulkStatePrefix(settings, state); + serialization.deserializeBinaryBulkStatePrefix(settings, state, nullptr); serialization.deserializeBinaryBulkWithMultipleStreams(column, rows, settings, state, nullptr); if (column->size() != rows) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index dba2bc1e56c..02a3f1b1165 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -116,7 +116,7 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd ISerialization::DeserializeBinaryBulkStatePtr state; auto serialization = type->getDefaultSerialization(); - serialization->deserializeBinaryBulkStatePrefix(settings, state); + serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr); serialization->deserializeBinaryBulkWithMultipleStreams(new_column, rows_to_read, settings, state, nullptr); block.insert(ColumnWithTypeAndName(new_column, type, column.name)); diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index a22bff6b8d2..49548940dc2 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -195,7 +195,7 @@ void MergeTreeReaderCompact::readPrefix( deserialize_settings.getter = buffer_getter_for_prefix; ISerialization::DeserializeBinaryBulkStatePtr state_for_prefix; - serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix); + serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix, nullptr); } SerializationPtr serialization; @@ -206,7 +206,7 @@ void MergeTreeReaderCompact::readPrefix( deserialize_settings.getter = buffer_getter; - serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name_and_type.name]); + serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name_and_type.name], nullptr); } catch (Exception & e) { diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 1fd886a6216..e9ff44306d2 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -100,9 +100,10 @@ void MergeTreeReaderWide::prefetchForAllColumns( try { auto & cache = caches[columns_to_read[pos].getNameInStorage()]; + auto & deserialize_states_cache = deserialize_states_caches[columns_to_read[pos].getNameInStorage()]; prefetchForColumn( priority, columns_to_read[pos], serializations[pos], from_mark, continue_reading, - current_task_last_mark, cache); + current_task_last_mark, cache, deserialize_states_cache); } catch (Exception & e) { @@ -128,9 +129,6 @@ size_t MergeTreeReaderWide::readRows( size_t num_columns = res_columns.size(); checkNumberOfColumns(num_columns); - if (deserialize_binary_bulk_state_map.empty()) - readPrefixes(num_columns, current_task_last_mark); - if (num_columns == 0) return max_rows_to_read; @@ -150,11 +148,12 @@ size_t MergeTreeReaderWide::readRows( { size_t column_size_before_reading = column->size(); auto & cache = caches[column_to_read.getNameInStorage()]; + auto & deserialize_states_cache = deserialize_states_caches[column_to_read.getNameInStorage()]; readData( column_to_read, serializations[pos], column, from_mark, continue_reading, current_task_last_mark, - max_rows_to_read, cache, /* was_prefetched =*/ !prefetched_streams.empty()); + max_rows_to_read, cache, deserialize_states_cache, /* was_prefetched =*/ !prefetched_streams.empty()); /// For elements of Nested, column_size_before_reading may be greater than column size /// if offsets are not empty and were already read, but elements are empty. @@ -293,30 +292,22 @@ static ReadBuffer * getStream( return stream.getDataBuffer(); } -void MergeTreeReaderWide::readPrefixes(size_t num_columns, size_t current_task_last_mark) -{ - for (size_t pos = 0; pos < num_columns; ++pos) - { - const auto & column_to_read = columns_to_read[pos]; - deserializePrefix(serializations[pos], column_to_read, current_task_last_mark); - } -} - void MergeTreeReaderWide::deserializePrefix( const SerializationPtr & serialization, const NameAndTypePair & name_and_type, - size_t current_task_last_mark) + size_t current_task_last_mark, + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache) { const auto & name = name_and_type.name; if (!deserialize_binary_bulk_state_map.contains(name)) { ISerialization::DeserializeBinaryBulkSettings deserialize_settings; - ISerialization::SubstreamsCache empty_cache; deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) { - return getStream(/* seek_to_start = */true, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, 0, /* seek_to_mark = */false, current_task_last_mark, empty_cache); + return getStream(/* seek_to_start = */true, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, 0, /* seek_to_mark = */false, current_task_last_mark, cache); }; - serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name]); + serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name], &deserialize_states_cache); } } @@ -327,8 +318,11 @@ void MergeTreeReaderWide::prefetchForColumn( size_t from_mark, bool continue_reading, size_t current_task_last_mark, - ISerialization::SubstreamsCache & cache) + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache) { + deserializePrefix(serialization, name_and_type, current_task_last_mark, cache, deserialize_states_cache); + serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(name_and_type, substream_path, data_part_info_for_read->getChecksums()); @@ -349,11 +343,15 @@ void MergeTreeReaderWide::prefetchForColumn( void MergeTreeReaderWide::readData( const NameAndTypePair & name_and_type, const SerializationPtr & serialization, ColumnPtr & column, size_t from_mark, bool continue_reading, size_t current_task_last_mark, - size_t max_rows_to_read, ISerialization::SubstreamsCache & cache, bool was_prefetched) + size_t max_rows_to_read, ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache, bool was_prefetched) { double & avg_value_size_hint = avg_value_size_hints[name_and_type.name]; ISerialization::DeserializeBinaryBulkSettings deserialize_settings; deserialize_settings.avg_value_size_hint = avg_value_size_hint; + + deserializePrefix(serialization, name_and_type, current_task_last_mark, cache, deserialize_states_cache); + deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) { bool seek_to_mark = !was_prefetched && !continue_reading; diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index 617e26864a9..edfd74ae3bc 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -52,7 +52,7 @@ private: void readData( const NameAndTypePair & name_and_type, const SerializationPtr & serialization, ColumnPtr & column, size_t from_mark, bool continue_reading, size_t current_task_last_mark, size_t max_rows_to_read, - ISerialization::SubstreamsCache & cache, bool was_prefetched); + ISerialization::SubstreamsCache & cache, ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache, bool was_prefetched); /// Make next readData more simple by calling 'prefetch' of all related ReadBuffers (column streams). void prefetchForColumn( @@ -62,16 +62,18 @@ private: size_t from_mark, bool continue_reading, size_t current_task_last_mark, - ISerialization::SubstreamsCache & cache); - - void readPrefixes(size_t num_columns, size_t current_task_last_mark); + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache); void deserializePrefix( const SerializationPtr & serialization, const NameAndTypePair & name_and_type, - size_t current_task_last_mark); + size_t current_task_last_mark, + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache); std::unordered_map caches; + std::unordered_map deserialize_states_caches; std::unordered_set prefetched_streams; ssize_t prefetched_from_mark = -1; }; diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index b652750346f..3bdbddd61d6 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -254,7 +254,7 @@ void LogSource::readData(const NameAndTypePair & name_and_type, ColumnPtr & colu if (!deserialize_states.contains(name)) { settings.getter = create_stream_getter(true); - serialization->deserializeBinaryBulkStatePrefix(settings, deserialize_states[name]); + serialization->deserializeBinaryBulkStatePrefix(settings, deserialize_states[name], nullptr); } settings.getter = create_stream_getter(false); From b5a66167034f93248e2d420a37ad2fc8a40c87a4 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 19 Apr 2024 19:19:49 +0000 Subject: [PATCH 043/439] Add state to cache --- src/DataTypes/Serializations/SerializationVariant.cpp | 1 + src/DataTypes/Serializations/SerializationVariantElement.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index 81c8a1f9ffa..af41fb18cb4 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -153,6 +153,7 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( UInt64 mode; readBinaryLittleEndian(mode, *discriminators_stream); discriminators_state = std::make_shared(mode); + addToSubstreamsDeserializeStatesCache(cache, settings.path, discriminators_state); } else { diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index 19ac268268e..6ec41b3a3ef 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -72,6 +72,7 @@ void SerializationVariantElement::deserializeBinaryBulkStatePrefix( UInt64 mode; readBinaryLittleEndian(mode, *discriminators_stream); discriminators_state = std::make_shared(mode); + addToSubstreamsDeserializeStatesCache(cache, settings.path, discriminators_state); } else { From 77e5e8f3555067a8d7324ca16725601e18b87164 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 19 Apr 2024 23:00:19 +0100 Subject: [PATCH 044/439] Fix special builds --- src/DataTypes/Serializations/SerializationVariant.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h index a2f077964e6..5bcb89b9367 100644 --- a/src/DataTypes/Serializations/SerializationVariant.h +++ b/src/DataTypes/Serializations/SerializationVariant.h @@ -164,7 +164,7 @@ private: struct DeserializeBinaryBulkStateVariantDiscriminators : public ISerialization::DeserializeBinaryBulkState { - DeserializeBinaryBulkStateVariantDiscriminators(UInt64 mode_) : mode(mode_) + explicit DeserializeBinaryBulkStateVariantDiscriminators(UInt64 mode_) : mode(mode_) { } From 8cbe3a61caa49e3e59d7e91b2fa74d975ead0c2e Mon Sep 17 00:00:00 2001 From: avogar Date: Sat, 20 Apr 2024 09:10:36 +0000 Subject: [PATCH 045/439] Fix special builds --- src/DataTypes/Serializations/SerializationVariant.cpp | 4 ++-- src/DataTypes/Serializations/SerializationVariantElement.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index af41fb18cb4..f2b547cdd89 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -72,7 +72,7 @@ void SerializationVariant::enumerateStreams( struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState { - SerializeBinaryBulkStateVariant(UInt64 mode) : discriminators_mode(mode) + explicit SerializeBinaryBulkStateVariant(UInt64 mode) : discriminators_mode(mode) { } @@ -380,7 +380,7 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( else if (auto * discriminators_stream = settings.getter(settings.path)) { variant_state = checkAndGetState(state); - auto discriminators_state = checkAndGetState(variant_state->discriminators_state); + auto * discriminators_state = checkAndGetState(variant_state->discriminators_state); /// Deserialize discriminators according to serialization mode. if (discriminators_state->mode.value == DiscriminatorsSerializationMode::BASIC) diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index 6ec41b3a3ef..2104d2bc53e 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -117,7 +117,7 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( else if (auto * discriminators_stream = settings.getter(settings.path)) { variant_element_state = checkAndGetState(state); - auto discriminators_state = checkAndGetState(variant_element_state->discriminators_state); + auto * discriminators_state = checkAndGetState(variant_element_state->discriminators_state); /// If we started to read a new column, reinitialize discriminators column in deserialization state. if (!variant_element_state->discriminators || result_column->empty()) @@ -241,7 +241,7 @@ size_t SerializationVariantElement::deserializeCompactDiscriminators( bool continuous_reading, DeserializeBinaryBulkStateVariantElement & variant_element_state) const { - auto discriminators_state = checkAndGetState(variant_element_state.discriminators_state); + auto * discriminators_state = checkAndGetState(variant_element_state.discriminators_state); auto & discriminators = assert_cast(*discriminators_column->assumeMutable()); auto & discriminators_data = discriminators.getData(); From e56cf30995063f2930a941555cc05a5e54aad77a Mon Sep 17 00:00:00 2001 From: avogar Date: Sat, 20 Apr 2024 09:11:19 +0000 Subject: [PATCH 046/439] Fix typo in comment --- src/DataTypes/Serializations/SerializationVariantElement.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index 2104d2bc53e..b56e4125612 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -347,7 +347,7 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB: /// Special case for LowCardinality. We want the result to be LowCardinality(Nullable), /// but we don't have a good way to apply null-mask for LowCardinality(), so, we first - /// cont our column to LowCardinality(Nullable()) and then use expand which will + /// convert our column to LowCardinality(Nullable()) and then use expand which will /// fill rows with 0 in mask with default value (that is NULL). if (prev->lowCardinality()) res_column = assert_cast(*res_column).cloneNullable(); From 87f8c680531cbccc3aff039507153c09c6c0b0e1 Mon Sep 17 00:00:00 2001 From: avogar Date: Sat, 20 Apr 2024 10:24:02 +0000 Subject: [PATCH 047/439] Reduce code duplication --- .../Serializations/SerializationVariant.cpp | 45 ++++++++++--------- .../Serializations/SerializationVariant.h | 4 ++ .../SerializationVariantElement.cpp | 22 +-------- 3 files changed, 31 insertions(+), 40 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index f2b547cdd89..1297ce15eae 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -141,27 +141,9 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { - settings.path.push_back(Substream::VariantDiscriminators); - - DeserializeBinaryBulkStatePtr discriminators_state; - if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path)) - { - discriminators_state = cached_state; - } - else if (auto * discriminators_stream = settings.getter(settings.path)) - { - UInt64 mode; - readBinaryLittleEndian(mode, *discriminators_stream); - discriminators_state = std::make_shared(mode); - addToSubstreamsDeserializeStatesCache(cache, settings.path, discriminators_state); - } - else - { - settings.path.pop_back(); + DeserializeBinaryBulkStatePtr discriminators_state = deserializeDiscriminatorsStatePrefix(settings, cache); + if (!discriminators_state) return; - } - - settings.path.pop_back(); auto variant_state = std::make_shared(); variant_state->discriminators_state = discriminators_state; @@ -179,6 +161,29 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( state = std::move(variant_state); } +ISerialization::DeserializeBinaryBulkStatePtr SerializationVariant::deserializeDiscriminatorsStatePrefix( + DeserializeBinaryBulkSettings & settings, + SubstreamsDeserializeStatesCache * cache) const +{ + settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStatePtr discriminators_state = nullptr; + if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path)) + { + discriminators_state = cached_state; + } + else if (auto * discriminators_stream = settings.getter(settings.path)) + { + UInt64 mode; + readBinaryLittleEndian(mode, *discriminators_stream); + discriminators_state = std::make_shared(mode); + addToSubstreamsDeserializeStatesCache(cache, settings.path, discriminators_state); + } + + settings.path.pop_back(); + return discriminators_state; +} + void SerializationVariant::serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h index 5bcb89b9367..ae7216e2223 100644 --- a/src/DataTypes/Serializations/SerializationVariant.h +++ b/src/DataTypes/Serializations/SerializationVariant.h @@ -176,6 +176,10 @@ private: ColumnVariant::Discriminator compact_discr = 0; }; + static DeserializeBinaryBulkStatePtr deserializeDiscriminatorsStatePrefix( + DeserializeBinaryBulkSettings & settings, + SubstreamsDeserializeStatesCache * cache) const; + std::vector deserializeCompactDiscriminators( ColumnPtr & discriminators_column, size_t limit, diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index b56e4125612..86c6cb4982d 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -60,27 +60,9 @@ struct SerializationVariantElement::DeserializeBinaryBulkStateVariantElement : p void SerializationVariantElement::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { - settings.path.push_back(Substream::VariantDiscriminators); - - DeserializeBinaryBulkStatePtr discriminators_state; - if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path)) - { - discriminators_state = cached_state; - } - else if (auto * discriminators_stream = settings.getter(settings.path)) - { - UInt64 mode; - readBinaryLittleEndian(mode, *discriminators_stream); - discriminators_state = std::make_shared(mode); - addToSubstreamsDeserializeStatesCache(cache, settings.path, discriminators_state); - } - else - { - settings.path.pop_back(); + DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache); + if (!discriminators_state) return; - } - - settings.path.pop_back(); auto variant_element_state = std::make_shared(); variant_element_state->discriminators_state = discriminators_state; From 7445e51f7394fa48328098ecc9ce9f1aea239841 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Sat, 20 Apr 2024 11:56:25 +0100 Subject: [PATCH 048/439] Fix build --- src/DataTypes/Serializations/SerializationVariant.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h index ae7216e2223..7c9c3042078 100644 --- a/src/DataTypes/Serializations/SerializationVariant.h +++ b/src/DataTypes/Serializations/SerializationVariant.h @@ -178,7 +178,7 @@ private: static DeserializeBinaryBulkStatePtr deserializeDiscriminatorsStatePrefix( DeserializeBinaryBulkSettings & settings, - SubstreamsDeserializeStatesCache * cache) const; + SubstreamsDeserializeStatesCache * cache); std::vector deserializeCompactDiscriminators( ColumnPtr & discriminators_column, From 8c5c54ba71f973e012aa59087805eab2364afa8e Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Sat, 20 Apr 2024 13:09:54 +0100 Subject: [PATCH 049/439] Don't mark static method as const --- src/DataTypes/Serializations/SerializationVariant.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index 1297ce15eae..ea8285ac226 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -163,7 +163,7 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( ISerialization::DeserializeBinaryBulkStatePtr SerializationVariant::deserializeDiscriminatorsStatePrefix( DeserializeBinaryBulkSettings & settings, - SubstreamsDeserializeStatesCache * cache) const + SubstreamsDeserializeStatesCache * cache) { settings.path.push_back(Substream::VariantDiscriminators); From fb4a230eee1051cd17d627c370eb1ffe318f2dd5 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 30 Apr 2024 10:44:25 +0200 Subject: [PATCH 050/439] Support reading partitioed DeltaLake columns --- src/Processors/Chunk.cpp | 4 +- .../DataLakes/DeltaLakeMetadataParser.cpp | 265 ++++++++++++++++-- .../DataLakes/DeltaLakeMetadataParser.h | 13 +- src/Storages/DataLakes/HudiMetadataParser.cpp | 19 +- src/Storages/DataLakes/HudiMetadataParser.h | 11 +- src/Storages/DataLakes/IStorageDataLake.h | 112 +++++--- .../DataLakes/Iceberg/StorageIceberg.cpp | 8 +- .../DataLakes/Iceberg/StorageIceberg.h | 4 +- src/Storages/DataLakes/PartitionColumns.h | 17 ++ src/Storages/DataLakes/S3MetadataReader.cpp | 7 +- src/Storages/StorageS3.cpp | 53 +++- src/Storages/StorageS3.h | 9 +- src/TableFunctions/ITableFunction.cpp | 2 +- src/TableFunctions/ITableFunctionDataLake.h | 15 +- src/TableFunctions/TableFunctionS3.cpp | 2 +- src/TableFunctions/TableFunctionS3Cluster.cpp | 2 +- tests/integration/test_storage_delta/test.py | 95 +++++++ 17 files changed, 534 insertions(+), 104 deletions(-) create mode 100644 src/Storages/DataLakes/PartitionColumns.h diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 2631f665f9c..8e3ca0b03b3 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -125,7 +125,7 @@ void Chunk::addColumn(size_t position, ColumnPtr column) if (position >= columns.size()) throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Chunk::addColumn(), max position = {}", - position, columns.size() - 1); + position, columns.size() ? columns.size() - 1 : 0); if (empty()) num_rows = column->size(); else if (column->size() != num_rows) @@ -143,7 +143,7 @@ void Chunk::erase(size_t position) if (position >= columns.size()) throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Chunk::erase(), max position = {}", - toString(position), toString(columns.size() - 1)); + toString(position), toString(columns.size() ? columns.size() - 1 : 0)); columns.erase(columns.begin() + position); } diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp index 14a912a180d..50b6ca83cb4 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp @@ -17,6 +17,22 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace fs = std::filesystem; @@ -26,6 +42,7 @@ namespace DB namespace ErrorCodes { extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; } @@ -65,9 +82,17 @@ struct DeltaLakeMetadataParser::Impl * An action changes one aspect of the table's state, for example, adding or removing a file. * Note: it is not a valid json, but a list of json's, so we read it in a while cycle. */ - std::set processMetadataFiles(const Configuration & configuration, ContextPtr context) + struct DeltaLakeMetadata + { + NamesAndTypesList schema; + Strings data_files; + DataLakePartitionColumns partition_columns; + }; + DeltaLakeMetadata processMetadataFiles(const Configuration & configuration, ContextPtr context) { std::set result_files; + NamesAndTypesList current_schema; + DataLakePartitionColumns current_partition_columns; const auto checkpoint_version = getCheckpointIfExists(result_files, configuration, context); if (checkpoint_version) @@ -81,7 +106,7 @@ struct DeltaLakeMetadataParser::Impl if (!MetadataReadHelper::exists(file_path, configuration)) break; - processMetadataFile(file_path, result_files, configuration, context); + processMetadataFile(file_path, result_files, current_schema, current_partition_columns, configuration, context); } LOG_TRACE( @@ -94,10 +119,10 @@ struct DeltaLakeMetadataParser::Impl configuration, deltalake_metadata_directory, metadata_file_suffix); for (const String & key : keys) - processMetadataFile(key, result_files, configuration, context); + processMetadataFile(key, result_files, current_schema, current_partition_columns, configuration, context); } - return result_files; + return DeltaLakeMetadata{current_schema, Strings(result_files.begin(), result_files.end()), current_partition_columns}; } /** @@ -132,6 +157,8 @@ struct DeltaLakeMetadataParser::Impl void processMetadataFile( const String & key, std::set & result, + NamesAndTypesList & file_schema, + DataLakePartitionColumns & file_partition_columns, const Configuration & configuration, ContextPtr context) { @@ -153,20 +180,217 @@ struct DeltaLakeMetadataParser::Impl if (json_str.empty()) continue; - const JSON json(json_str); - if (json.has("add")) + Poco::JSON::Parser parser; + Poco::Dynamic::Var json = parser.parse(json_str); + Poco::JSON::Object::Ptr object = json.extract(); + + if (object->has("add")) { - const auto path = json["add"]["path"].getString(); + auto add_object = object->get("add").extract(); + auto path = add_object->getValue("path"); result.insert(fs::path(configuration.getPath()) / path); + + auto filename = fs::path(path).filename().string(); + auto it = file_partition_columns.find(filename); + if (it == file_partition_columns.end()) + { + auto partition_values = add_object->get("partitionValues").extract(); + if (partition_values->size()) + { + auto & current_partition_columns = file_partition_columns[filename]; + for (const auto & name : partition_values->getNames()) + { + const auto value = partition_values->getValue(name); + auto name_and_type = file_schema.tryGetByName(name); + if (!name_and_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", name); + + auto field = getFieldValue(value, name_and_type->type); + current_partition_columns.emplace_back(*name_and_type, field); + + LOG_TEST(log, "Partition {} value is {} (for {})", name, value, filename); + } + } + } } - else if (json.has("remove")) + else if (object->has("remove")) { - const auto path = json["remove"]["path"].getString(); + auto path = object->get("remove").extract()->getValue("path"); result.erase(fs::path(configuration.getPath()) / path); } + if (file_schema.empty()) + { + // std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + // object->stringify(oss); + // LOG_TEST(log, "Metadata: {}", oss.str()); + + if (object->has("metaData")) + { + const auto metadata_object = object->get("metaData").extract(); + const auto schema_object = metadata_object->getValue("schemaString"); + + Poco::JSON::Parser p; + Poco::Dynamic::Var fields_json = parser.parse(schema_object); + Poco::JSON::Object::Ptr fields_object = fields_json.extract(); + + const auto fields = fields_object->get("fields").extract(); + for (size_t i = 0; i < fields->size(); ++i) + { + const auto field = fields->getObject(static_cast(i)); + auto name = field->getValue("name"); + auto type = field->getValue("type"); + auto is_nullable = field->getValue("nullable"); + + file_schema.push_back({name, getFieldType(field, "type", is_nullable)}); + } + } + } + /// TODO: Check if schema in each file is the same? } } + DataTypePtr getFieldType(const Poco::JSON::Object::Ptr & field, const String & type_key, bool is_nullable) + { + if (field->isObject(type_key)) + return getComplexTypeFromObject(field->getObject(type_key)); + + auto type = field->get(type_key); + if (type.isString()) + { + const String & type_name = type.extract(); + auto data_type = getSimpleTypeByName(type_name); + return is_nullable ? makeNullable(data_type) : data_type; + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected 'type' field: {}", type.toString()); + } + + Field getFieldValue(const String & value, DataTypePtr data_type) + { + DataTypePtr check_type; + if (data_type->isNullable()) + check_type = static_cast(data_type.get())->getNestedType(); + else + check_type = data_type; + + WhichDataType which(check_type->getTypeId()); + if (which.isStringOrFixedString()) + return value; + else if (which.isInt8()) + return parse(value); + else if (which.isInt16()) + return parse(value); + else if (which.isInt32()) + return parse(value); + else if (which.isInt64()) + return parse(value); + else if (which.isFloat32()) + return parse(value); + else if (which.isFloat64()) + return parse(value); + else if (which.isDate()) + return UInt16{LocalDate{std::string(value)}.getDayNum()}; + else if (which.isDate32()) + return Int32{LocalDate{std::string(value)}.getExtenedDayNum()}; + else if (which.isDateTime64()) + { + ReadBufferFromString in(value); + DateTime64 time = 0; + readDateTime64Text(time, 6, in, assert_cast(data_type.get())->getTimeZone()); + return time; + } + // else if (which.isDecimal32()) + // return parse(value); + // else if (which.isDecimal64()) + // return parse(value); + // else if (which.isDecimal128()) + // return parse(value); + // else if (which.isDecimal256()) + // return parse(value); + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type for {}", check_type->getColumnType()); + } + + DataTypePtr getSimpleTypeByName(const String & type_name) + { + /// https://github.com/delta-io/delta/blob/master/PROTOCOL.md#primitive-types + + if (type_name == "string" || type_name == "binary") + return std::make_shared(); + if (type_name == "long") + return std::make_shared(); + if (type_name == "integer") + return std::make_shared(); + if (type_name == "short") + return std::make_shared(); + if (type_name == "byte") + return std::make_shared(); + if (type_name == "float") + return std::make_shared(); + if (type_name == "double") + return std::make_shared(); + if (type_name == "boolean") + return DataTypeFactory::instance().get("Bool"); + if (type_name == "date") + return std::make_shared(); + if (type_name == "timestamp") + return std::make_shared(6); + if (type_name.starts_with("decimal(") && type_name.ends_with(')')) + { + ReadBufferFromString buf(std::string_view(type_name.begin() + 8, type_name.end() - 1)); + size_t precision; + size_t scale; + readIntText(precision, buf); + skipWhitespaceIfAny(buf); + assertChar(',', buf); + skipWhitespaceIfAny(buf); + tryReadIntText(scale, buf); + return createDecimal(precision, scale); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type: {}", type_name); + } + + DataTypePtr getComplexTypeFromObject(const Poco::JSON::Object::Ptr & type) + { + String type_name = type->getValue("type"); + + if (type_name == "struct") + { + DataTypes element_types; + Names element_names; + auto fields = type->get("fields").extract(); + element_types.reserve(fields->size()); + element_names.reserve(fields->size()); + for (size_t i = 0; i != fields->size(); ++i) + { + auto field = fields->getObject(static_cast(i)); + element_names.push_back(field->getValue("name")); + auto required = field->getValue("required"); + element_types.push_back(getFieldType(field, "type", required)); + } + + return std::make_shared(element_types, element_names); + } + + if (type_name == "array") + { + bool is_nullable = type->getValue("containsNull"); + auto element_type = getFieldType(type, "elementType", is_nullable); + return std::make_shared(element_type); + } + + if (type_name == "map") + { + bool is_nullable = type->getValue("containsNull"); + auto key_type = getFieldType(type, "keyType", /* is_nullable */false); + auto value_type = getFieldType(type, "valueType", is_nullable); + return std::make_shared(key_type, value_type); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type: {}", type_name); + } + /** * Checkpoints in delta-lake are created each 10 commits by default. * Latest checkpoint is written in _last_checkpoint file: _delta_log/_last_checkpoint @@ -272,8 +496,8 @@ struct DeltaLakeMetadataParser::Impl arrow::default_memory_pool(), &reader)); - std::shared_ptr schema; - THROW_ARROW_NOT_OK(reader->GetSchema(&schema)); + std::shared_ptr file_schema; + THROW_ARROW_NOT_OK(reader->GetSchema(&file_schema)); ArrowColumnToCHColumn column_reader( header, "Parquet", @@ -318,20 +542,19 @@ struct DeltaLakeMetadataParser::Impl template -DeltaLakeMetadataParser::DeltaLakeMetadataParser() : impl(std::make_unique()) -{ -} - -template -Strings DeltaLakeMetadataParser::getFiles(const Configuration & configuration, ContextPtr context) +DeltaLakeMetadataParser::DeltaLakeMetadataParser(const Configuration & configuration, ContextPtr context) + : impl(std::make_unique()) { auto result = impl->processMetadataFiles(configuration, context); - return Strings(result.begin(), result.end()); + data_files = result.data_files; + schema = result.schema; + partition_columns = result.partition_columns; + + LOG_TRACE(impl->log, "Found {} data files, {} partition files, schema: {}", + data_files.size(), partition_columns.size(), schema.toString()); } -template DeltaLakeMetadataParser::DeltaLakeMetadataParser(); -template Strings DeltaLakeMetadataParser::getFiles( - const StorageS3::Configuration & configuration, ContextPtr); +template DeltaLakeMetadataParser::DeltaLakeMetadataParser(const StorageS3::Configuration & configuration, ContextPtr context); } #endif diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.h b/src/Storages/DataLakes/DeltaLakeMetadataParser.h index df7276b90b4..58cf7acd2a3 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.h +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.h @@ -1,7 +1,9 @@ #pragma once +#include #include #include +#include "PartitionColumns.h" namespace DB { @@ -10,13 +12,20 @@ template struct DeltaLakeMetadataParser { public: - DeltaLakeMetadataParser(); + DeltaLakeMetadataParser(const Configuration & configuration, ContextPtr context); - Strings getFiles(const Configuration & configuration, ContextPtr context); + Strings getFiles() { return data_files; } + + NamesAndTypesList getTableSchema() const { return schema; } + + DataLakePartitionColumns getPartitionColumns() const { return partition_columns; } private: struct Impl; std::shared_ptr impl; + NamesAndTypesList schema; + DataLakePartitionColumns partition_columns; + Strings data_files; }; } diff --git a/src/Storages/DataLakes/HudiMetadataParser.cpp b/src/Storages/DataLakes/HudiMetadataParser.cpp index 699dfe8fda0..ad66ba69258 100644 --- a/src/Storages/DataLakes/HudiMetadataParser.cpp +++ b/src/Storages/DataLakes/HudiMetadataParser.cpp @@ -61,7 +61,7 @@ struct HudiMetadataParser::Impl String key; UInt64 timestamp = 0; }; - std::unordered_map> data_files; + std::unordered_map> files; for (const auto & key : keys) { @@ -76,7 +76,7 @@ struct HudiMetadataParser::Impl const auto & file_id = file_parts[0]; const auto timestamp = parse(file_parts[2]); - auto & file_info = data_files[partition][file_id]; + auto & file_info = files[partition][file_id]; if (file_info.timestamp == 0 || file_info.timestamp < timestamp) { file_info.key = std::move(key); @@ -85,7 +85,7 @@ struct HudiMetadataParser::Impl } Strings result; - for (auto & [partition, partition_data] : data_files) + for (auto & [partition, partition_data] : files) { LOG_TRACE(log, "Adding {} data files from partition {}", partition, partition_data.size()); for (auto & [file_id, file_data] : partition_data) @@ -97,19 +97,12 @@ struct HudiMetadataParser::Impl template -HudiMetadataParser::HudiMetadataParser() : impl(std::make_unique()) +HudiMetadataParser::HudiMetadataParser(const Configuration & configuration, ContextPtr) : impl(std::make_unique()) { + data_files = impl->processMetadataFiles(configuration); } -template -Strings HudiMetadataParser::getFiles(const Configuration & configuration, ContextPtr) -{ - return impl->processMetadataFiles(configuration); -} - -template HudiMetadataParser::HudiMetadataParser(); -template Strings HudiMetadataParser::getFiles( - const StorageS3::Configuration & configuration, ContextPtr); +template HudiMetadataParser::HudiMetadataParser(const StorageS3::Configuration & configuration, ContextPtr); } diff --git a/src/Storages/DataLakes/HudiMetadataParser.h b/src/Storages/DataLakes/HudiMetadataParser.h index 6727ba2f718..9e2901a9d24 100644 --- a/src/Storages/DataLakes/HudiMetadataParser.h +++ b/src/Storages/DataLakes/HudiMetadataParser.h @@ -1,7 +1,9 @@ #pragma once +#include #include #include +#include "PartitionColumns.h" namespace DB { @@ -10,13 +12,18 @@ template struct HudiMetadataParser { public: - HudiMetadataParser(); + HudiMetadataParser(const Configuration & configuration, ContextPtr context); - Strings getFiles(const Configuration & configuration, ContextPtr context); + Strings getFiles() { return data_files; } + + NamesAndTypesList getTableSchema() const { return {}; } + + DataLakePartitionColumns getPartitionColumns() const { return {}; } private: struct Impl; std::shared_ptr impl; + Strings data_files; }; } diff --git a/src/Storages/DataLakes/IStorageDataLake.h b/src/Storages/DataLakes/IStorageDataLake.h index 711abbde38c..be23c017043 100644 --- a/src/Storages/DataLakes/IStorageDataLake.h +++ b/src/Storages/DataLakes/IStorageDataLake.h @@ -9,6 +9,7 @@ #include #include #include +#include "PartitionColumns.h" #include @@ -23,15 +24,51 @@ public: using Configuration = typename Storage::Configuration; template - explicit IStorageDataLake(const Configuration & configuration_, ContextPtr context_, LoadingStrictnessLevel mode, Args && ...args) - : Storage(getConfigurationForDataRead(configuration_, context_, {}, mode), context_, std::forward(args)...) - , base_configuration(configuration_) - , log(getLogger(getName())) {} // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) + static StoragePtr create( + const Configuration & configuration_, + ContextPtr context_, + LoadingStrictnessLevel mode, + const ColumnsDescription & columns_, + Args && ...args) + { + std::unique_ptr metadata; + Configuration read_configuration; + Configuration base_configuration{configuration_}; + try + { + base_configuration.update(context_); + metadata = std::make_unique(base_configuration, context_); + read_configuration = getConfigurationForDataRead(*metadata, base_configuration, context_); + } + catch (...) + { + if (mode <= LoadingStrictnessLevel::CREATE) + throw; + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + return std::make_shared>( + configuration_, + read_configuration, + context_, + columns_.empty() && metadata ? ColumnsDescription(metadata->getTableSchema()) : columns_, + std::forward(args)...); + } template - static StoragePtr create(const Configuration & configuration_, ContextPtr context_, LoadingStrictnessLevel mode, Args && ...args) + explicit IStorageDataLake( + const Configuration & base_configuration_, + const Configuration & read_configuration_, + ContextPtr context_, + const ColumnsDescription & columns_, + Args && ...args) + : Storage(read_configuration_, + context_, + columns_, + std::forward(args)...) + , base_configuration(base_configuration_) + , log(getLogger(getName())) // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) { - return std::make_shared>(configuration_, context_, mode, std::forward(args)...); } String getName() const override { return name; } @@ -41,8 +78,18 @@ public: const std::optional & format_settings, const ContextPtr & local_context) { - auto configuration = getConfigurationForDataRead(base_configuration, local_context); - return Storage::getTableStructureFromData(configuration, format_settings, local_context); + base_configuration.update(local_context); + auto metadata = std::make_unique(base_configuration, local_context); + auto schema = metadata->getTableSchema(); + if (!schema.empty()) + { + return ColumnsDescription(schema); + } + else + { + auto read_configuration = getConfigurationForDataRead(*metadata, base_configuration, local_context); + return Storage::getTableStructureFromData(read_configuration, format_settings, local_context); + } } static Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context) @@ -65,51 +112,31 @@ public: private: static Configuration getConfigurationForDataRead( - const Configuration & base_configuration, const ContextPtr & local_context, const Strings & keys = {}, - LoadingStrictnessLevel mode = LoadingStrictnessLevel::CREATE) + MetadataParser & metadata_, + const Configuration & base_configuration, + const ContextPtr & local_context) { auto configuration{base_configuration}; configuration.update(local_context); configuration.static_configuration = true; - - try - { - if (keys.empty()) - configuration.keys = getDataFiles(configuration, local_context); - else - configuration.keys = keys; - - LOG_TRACE( - getLogger("DataLake"), - "New configuration path: {}, keys: {}", - configuration.getPath(), fmt::join(configuration.keys, ", ")); - - configuration.connect(local_context); - return configuration; - } - catch (...) - { - if (mode <= LoadingStrictnessLevel::CREATE) - throw; - tryLogCurrentException(__PRETTY_FUNCTION__); - return configuration; - } - } - - static Strings getDataFiles(const Configuration & configuration, const ContextPtr & local_context) - { - return MetadataParser().getFiles(configuration, local_context); + configuration.keys = metadata_.getFiles(); + configuration.connect(local_context); + return configuration; } void updateConfigurationImpl(const ContextPtr & local_context) { const bool updated = base_configuration.update(local_context); - auto new_keys = getDataFiles(base_configuration, local_context); + + auto metadata = MetadataParser(base_configuration, local_context); + auto new_keys = metadata.getFiles(); + Storage::partition_columns = metadata.getPartitionColumns(); if (!updated && new_keys == Storage::getConfiguration().keys) return; - Storage::useConfiguration(getConfigurationForDataRead(base_configuration, local_context, new_keys)); + auto read_configuration = getConfigurationForDataRead(metadata, base_configuration, local_context); + Storage::useConfiguration(read_configuration); } Configuration base_configuration; @@ -127,8 +154,9 @@ static StoragePtr createDataLakeStorage(const StorageFactory::Arguments & args) if (configuration.format == "auto") configuration.format = "Parquet"; - return DataLake::create(configuration, args.getContext(), args.mode, args.table_id, args.columns, args.constraints, - args.comment, getFormatSettings(args.getContext())); + return DataLake::create(configuration, args.getContext(), args.mode, + args.columns, args.table_id, args.constraints, + args.comment, getFormatSettings(args.getContext())); } } diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp index 19cd97c3d4f..6557b67d20c 100644 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp +++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp @@ -9,8 +9,8 @@ StoragePtr StorageIceberg::create( const DB::StorageIceberg::Configuration & base_configuration, DB::ContextPtr context_, LoadingStrictnessLevel mode, - const DB::StorageID & table_id_, const DB::ColumnsDescription & columns_, + const DB::StorageID & table_id_, const DB::ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_) @@ -36,8 +36,8 @@ StoragePtr StorageIceberg::create( std::move(metadata), configuration, context_, - table_id_, columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, + table_id_, constraints_, comment, format_settings_); @@ -47,12 +47,12 @@ StorageIceberg::StorageIceberg( std::unique_ptr metadata_, const Configuration & configuration_, ContextPtr context_, - const StorageID & table_id_, const ColumnsDescription & columns_, + const StorageID & table_id_, const ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_) - : StorageS3(configuration_, context_, table_id_, columns_, constraints_, comment, format_settings_) + : StorageS3(configuration_, context_, columns_, table_id_, constraints_, comment, format_settings_) , current_metadata(std::move(metadata_)) , base_configuration(configuration_) { diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.h b/src/Storages/DataLakes/Iceberg/StorageIceberg.h index 45cbef0b41b..a95e203aa72 100644 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.h +++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.h @@ -31,8 +31,8 @@ public: static StoragePtr create(const Configuration & base_configuration, ContextPtr context_, LoadingStrictnessLevel mode, - const StorageID & table_id_, const ColumnsDescription & columns_, + const StorageID & table_id_, const ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_); @@ -41,8 +41,8 @@ public: std::unique_ptr metadata_, const Configuration & configuration_, ContextPtr context_, - const StorageID & table_id_, const ColumnsDescription & columns_, + const StorageID & table_id_, const ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_); diff --git a/src/Storages/DataLakes/PartitionColumns.h b/src/Storages/DataLakes/PartitionColumns.h new file mode 100644 index 00000000000..604dbbf78fa --- /dev/null +++ b/src/Storages/DataLakes/PartitionColumns.h @@ -0,0 +1,17 @@ +#pragma once +#include +#include + +namespace DB +{ + +struct DataLakePartitionColumn +{ + NameAndTypePair name_and_type; + Field value; +}; + +/// Data file -> partition columns +using DataLakePartitionColumns = std::unordered_map>; + +} diff --git a/src/Storages/DataLakes/S3MetadataReader.cpp b/src/Storages/DataLakes/S3MetadataReader.cpp index d66e21550a3..6a00ae1e452 100644 --- a/src/Storages/DataLakes/S3MetadataReader.cpp +++ b/src/Storages/DataLakes/S3MetadataReader.cpp @@ -45,12 +45,16 @@ std::vector S3DataLakeMetadataReadHelper::listFiles( const auto & bucket = base_configuration.url.bucket; const auto & client = base_configuration.client; + auto path = std::filesystem::path(table_path) / prefix; + std::vector res; S3::ListObjectsV2Request request; Aws::S3::Model::ListObjectsV2Outcome outcome; request.SetBucket(bucket); - request.SetPrefix(std::filesystem::path(table_path) / prefix); + request.SetPrefix(path); + + LOG_TEST(getLogger("S3DataLakeMetadataReadHelper"), "Listing files in {}", path.string()); bool is_finished{false}; while (!is_finished) @@ -69,6 +73,7 @@ std::vector S3DataLakeMetadataReadHelper::listFiles( for (const auto & obj : result_batch) { const auto & filename = obj.GetKey(); + LOG_TEST(getLogger("S3DataLakeMetadataReadHelper"), "Listed file: {} (searching for suffix: {})", filename, suffix); if (filename.ends_with(suffix)) res.push_back(filename); } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 0371a9de08a..205b5d3381e 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -148,20 +148,23 @@ public: const StorageSnapshotPtr & storage_snapshot_, const ContextPtr & context_, Block sample_block, + const StorageS3::Configuration & query_configuration_, StorageS3 & storage_, ReadFromFormatInfo read_from_format_info_, bool need_only_count_, size_t max_block_size_, - size_t num_streams_) + size_t num_streams_, + const DataLakePartitionColumns & partition_columns_) : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) , column_names(column_names_) , storage(storage_) , read_from_format_info(std::move(read_from_format_info_)) , need_only_count(need_only_count_) + , query_configuration(query_configuration_) + , partition_columns(partition_columns_) , max_block_size(max_block_size_) , num_streams(num_streams_) { - query_configuration = storage.updateConfigurationAndGetCopy(context); virtual_columns = storage.getVirtualsList(); } @@ -172,6 +175,7 @@ private: bool need_only_count; StorageS3::Configuration query_configuration; NamesAndTypesList virtual_columns; + DataLakePartitionColumns partition_columns; size_t max_block_size; size_t num_streams; @@ -577,7 +581,8 @@ StorageS3Source::StorageS3Source( const String & url_host_and_port_, std::shared_ptr file_iterator_, const size_t max_parsing_threads_, - bool need_only_count_) + bool need_only_count_, + const DataLakePartitionColumns & partition_columns_) : SourceWithKeyCondition(info.source_header, false) , WithContext(context_) , name(std::move(name_)) @@ -593,6 +598,7 @@ StorageS3Source::StorageS3Source( , client(client_) , sample_block(info.format_header) , format_settings(format_settings_) + , partition_columns(partition_columns_) , requested_virtual_columns(info.requested_virtual_columns) , file_iterator(file_iterator_) , max_parsing_threads(max_parsing_threads_) @@ -798,8 +804,37 @@ Chunk StorageS3Source::generate() size_t chunk_size = 0; if (const auto * input_format = reader.getInputFormat()) chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk(); + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath(), reader.getFileSize()); + + if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) + { + auto filename = fs::path(reader.getPath()).filename().string(); + auto partition_values = partition_columns.find(filename); + + for (const auto & [name_and_type, value] : partition_values->second) + { + if (!sample_block.has(name_and_type.name)) + continue; + + auto column_pos = sample_block.getPositionByName(name_and_type.name); + + const auto & type = name_and_type.type; + auto partition_column = type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); + /// This column is filled with default value now, remove it. + chunk.erase(column_pos); + /// Add correct values. + if (chunk.hasColumns()) + { + chunk.addColumn(column_pos, std::move(partition_column)); + } + else + { + chunk.addColumn(std::move(partition_column)); + } + } + } return chunk; } @@ -1072,8 +1107,8 @@ private: StorageS3::StorageS3( const Configuration & configuration_, const ContextPtr & context_, - const StorageID & table_id_, const ColumnsDescription & columns_, + const StorageID & table_id_, const ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_, @@ -1190,17 +1225,20 @@ void StorageS3::read( bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; + auto query_configuration = updateConfigurationAndGetCopy(local_context); auto reading = std::make_unique( column_names, query_info, storage_snapshot, local_context, read_from_format_info.source_header, + query_configuration, *this, std::move(read_from_format_info), need_only_count, max_block_size, - num_streams); + num_streams, + partition_columns); query_plan.addStep(std::move(reading)); } @@ -1262,7 +1300,8 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()), iterator_wrapper, max_parsing_threads, - need_only_count); + need_only_count, + partition_columns); source->setKeyCondition(filter_actions_dag, context); pipes.emplace_back(std::move(source)); @@ -1973,8 +2012,8 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory) return std::make_shared( std::move(configuration), args.getContext(), - args.table_id, args.columns, + args.table_id, args.constraints, args.comment, format_settings, diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index c8ab28fb20e..9e1d9eb5aad 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -141,7 +142,8 @@ public: const String & url_host_and_port, std::shared_ptr file_iterator_, size_t max_parsing_threads, - bool need_only_count_); + bool need_only_count_, + const DataLakePartitionColumns & partition_columns_ = {}); ~StorageS3Source() override; @@ -170,6 +172,7 @@ private: std::shared_ptr client; Block sample_block; std::optional format_settings; + DataLakePartitionColumns partition_columns; struct ReaderHolder { @@ -305,8 +308,8 @@ public: StorageS3( const Configuration & configuration_, const ContextPtr & context_, - const StorageID & table_id_, const ColumnsDescription & columns_, + const StorageID & table_id_, const ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_, @@ -363,6 +366,8 @@ protected: const Configuration & getConfiguration(); + mutable DataLakePartitionColumns partition_columns; + private: friend class StorageS3Cluster; friend class TableFunctionS3Cluster; diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp index 137e1dc27fe..e5676c5c25d 100644 --- a/src/TableFunctions/ITableFunction.cpp +++ b/src/TableFunctions/ITableFunction.cpp @@ -36,7 +36,7 @@ StoragePtr ITableFunction::execute(const ASTPtr & ast_function, ContextPtr conte if (cached_columns.empty()) return executeImpl(ast_function, context, table_name, std::move(cached_columns), is_insert_query); - if (hasStaticStructure() && cached_columns == getActualTableStructure(context,is_insert_query)) + if (hasStaticStructure() && cached_columns == getActualTableStructure(context, is_insert_query)) return executeImpl(ast_function, context_to_use, table_name, std::move(cached_columns), is_insert_query); auto this_table_function = shared_from_this(); diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index 91165ba6705..1d946d9b5fa 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -26,21 +26,28 @@ protected: const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, - ColumnsDescription /*cached_columns*/, + ColumnsDescription cached_columns, bool /*is_insert_query*/) const override { ColumnsDescription columns; if (TableFunction::configuration.structure != "auto") columns = parseColumnsListFromString(TableFunction::configuration.structure, context); + else if (!structure_hint.empty()) + columns = structure_hint; + else if (!cached_columns.empty()) + columns = cached_columns; StoragePtr storage = Storage::create( - TableFunction::configuration, context, LoadingStrictnessLevel::CREATE, StorageID(TableFunction::getDatabaseName(), table_name), - columns, ConstraintsDescription{}, String{}, std::nullopt); + TableFunction::configuration, context, LoadingStrictnessLevel::CREATE, + columns, StorageID(TableFunction::getDatabaseName(), table_name), + ConstraintsDescription{}, String{}, std::nullopt); storage->startup(); return storage; } + void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } + const char * getStorageTypeName() const override { return Storage::name; } ColumnsDescription getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const override @@ -60,6 +67,8 @@ protected: TableFunction::configuration.format = "Parquet"; TableFunction::parseArguments(ast_function, context); } + + ColumnsDescription structure_hint; }; } diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index a8c100ebd44..93e43fb17ac 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -420,8 +420,8 @@ StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, Context StoragePtr storage = std::make_shared( configuration, context, - StorageID(getDatabaseName(), table_name), columns, + StorageID(getDatabaseName(), table_name), ConstraintsDescription{}, String{}, /// No format_settings for table function S3 diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index e727c4e4c89..3f8ed72479b 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -37,8 +37,8 @@ StoragePtr TableFunctionS3Cluster::executeImpl( storage = std::make_shared( configuration, context, - StorageID(getDatabaseName(), table_name), columns, + StorageID(getDatabaseName(), table_name), ConstraintsDescription{}, /* comment */String{}, /* format_settings */std::nullopt, /// No format_settings for S3Cluster diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 25f0b58e0f5..c6bb8fd8d69 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -511,3 +511,98 @@ def test_restart_broken_table_function(started_cluster): upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + + +def test_partition_columns(started_cluster): + instance = started_cluster.instances["node1"] + spark = started_cluster.spark_session + minio_client = started_cluster.minio_client + bucket = started_cluster.minio_bucket + TABLE_NAME = "test_partition_columns" + result_file = f"{TABLE_NAME}" + partition_column = "c" + + delta_table = ( + DeltaTable.create(spark) + .tableName(TABLE_NAME) + .location(f"/{result_file}") + .addColumn("a", "INT") + .addColumn("b", "STRING") + .addColumn("c", "DATE") + .partitionedBy(partition_column) + .execute() + ) + num_rows = 9 + + schema = StructType( + [ + StructField("a", IntegerType()), + StructField("b", StringType()), + StructField("c", DateType()), + ] + ) + + for i in range(1, num_rows + 1): + data = [ + ( + i, + "test" + str(i), + datetime.strptime(f"2000-01-0{i}", "%Y-%m-%d"), + ) + ] + df = spark.createDataFrame(data=data, schema=schema) + df.printSchema() + df.write.mode("append").format("delta").partitionBy(partition_column).save( + f"/{TABLE_NAME}" + ) + + minio_client = started_cluster.minio_client + bucket = started_cluster.minio_bucket + + files = upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + assert len(files) > 0 + print(f"Uploaded files: {files}") + + result = instance.query( + f"describe table deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')" + ).strip() + + assert ( + result + == "a\tNullable(Int32)\t\t\t\t\t\nb\tNullable(String)\t\t\t\t\t\nc\tNullable(Date)" + ) + + result = int( + instance.query( + f"""SELECT count() + FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') + """ + ) + ) + assert result == num_rows + result = int( + instance.query( + f"""SELECT count() + FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') + WHERE c == toDateTime('2000/01/05') + """ + ) + ) + assert result == 1 + + # instance.query( + # f""" + # DROP TABLE IF EXISTS {TABLE_NAME}; + # CREATE TABLE {TABLE_NAME} (a Int32, b String, c DateTime) + # ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" + # ) + # assert ( + # int( + # instance.query( + # f"SELECT count() FROM {TABLE_NAME} WHERE c != toDateTime('2000/01/05')" + # ) + # ) + # == num_rows - 1 + # ) + # instance.query(f"SELECT a, b, c, FROM {TABLE_NAME}") + # assert False From 9ed3acce8223485798b22bcffcd7bd8d595cd025 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 10 May 2024 18:22:23 +0000 Subject: [PATCH 051/439] refactoring near azure blob storage --- src/Backups/BackupIO_AzureBlobStorage.cpp | 96 +++--- src/Backups/BackupIO_AzureBlobStorage.h | 15 +- .../registerBackupEngineAzureBlobStorage.cpp | 66 ++-- src/Core/Settings.h | 31 +- .../IO/WriteBufferFromAzureBlobStorage.cpp | 4 +- .../IO/WriteBufferFromAzureBlobStorage.h | 2 +- .../AzureBlobStorage/AzureBlobStorageAuth.h | 58 ---- ...ageAuth.cpp => AzureBlobStorageCommon.cpp} | 276 +++++++++------- .../AzureBlobStorage/AzureBlobStorageCommon.h | 139 ++++++++ .../AzureBlobStorage/AzureObjectStorage.cpp | 37 ++- .../AzureBlobStorage/AzureObjectStorage.h | 75 +---- .../Cached/CachedObjectStorage.h | 2 +- src/Disks/ObjectStorages/IObjectStorage.h | 2 +- .../ObjectStorages/ObjectStorageFactory.cpp | 17 +- .../copyAzureBlobStorageFile.cpp | 9 +- .../copyAzureBlobStorageFile.h | 4 +- src/Storages/StorageAzureBlob.cpp | 308 +++++------------- src/Storages/StorageAzureBlob.h | 26 +- src/Storages/StorageAzureBlobCluster.cpp | 5 +- .../TableFunctionAzureBlobStorage.cpp | 65 ++-- .../TableFunctionAzureBlobStorageCluster.cpp | 17 +- 21 files changed, 604 insertions(+), 650 deletions(-) delete mode 100644 src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h rename src/Disks/ObjectStorages/AzureBlobStorage/{AzureBlobStorageAuth.cpp => AzureBlobStorageCommon.cpp} (53%) create mode 100644 src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index a3998431674..3f60ed5c0b4 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -30,21 +31,21 @@ namespace ErrorCodes } BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( - StorageAzureBlob::Configuration configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_) : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} - , configuration(configuration_) + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.endpoint.container_name, false, false} + , connection_params(connection_params_) + , blob_path(blob_path_) { - auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); - client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); - + auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); object_storage = std::make_unique("BackupReaderAzureBlobStorage", std::move(client_ptr), - StorageAzureBlob::createSettings(context_), - configuration_.container); + AzureBlobStorage::getRequestSettings(context_->getSettingsRef()), + connection_params.endpoint.container_name); client = object_storage->getAzureBlobStorageClient(); settings = object_storage->getSettings(); } @@ -53,20 +54,20 @@ BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default; bool BackupReaderAzureBlobStorage::fileExists(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return object_storage->exists(StoredObject(key)); } UInt64 BackupReaderAzureBlobStorage::getFileSize(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); return object_metadata.size_bytes; } std::unique_ptr BackupReaderAzureBlobStorage::readFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); @@ -81,23 +82,23 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, && (destination_data_source_description.is_encrypted == encrypted_in_backup)) { LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName()); - auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional &) -> size_t + auto write_blob_function = [&](const Strings & dst_blob_path, WriteMode mode, const std::optional &) -> size_t { /// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files. - if (blob_path.size() != 2 || mode != WriteMode::Rewrite) + if (dst_blob_path.size() != 2 || mode != WriteMode::Rewrite) throw Exception(ErrorCodes::LOGICAL_ERROR, "Blob writing function called with unexpected blob_path.size={} or mode={}", - blob_path.size(), mode); + dst_blob_path.size(), mode); copyAzureBlobStorageFile( client, destination_disk->getObjectStorage()->getAzureBlobStorageClient(), - configuration.container, - fs::path(configuration.blob_path) / path_in_backup, + connection_params.endpoint.container_name, + fs::path(blob_path) / path_in_backup, 0, file_size, - /* dest_container */ blob_path[1], - /* dest_path */ blob_path[0], + /* dest_container */ dst_blob_path[1], + /* dest_path */ dst_blob_path[0], settings, read_settings, threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupRDAzure")); @@ -115,22 +116,25 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( - StorageAzureBlob::Configuration configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} - , configuration(configuration_) + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.endpoint.container_name, false, false} + , connection_params(connection_params_) + , blob_path(blob_path_) { - auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false, attempt_to_create_container); - client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); + if (!attempt_to_create_container) + connection_params.endpoint.container_already_exists = true; + auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); object_storage = std::make_unique("BackupWriterAzureBlobStorage", std::move(client_ptr), - StorageAzureBlob::createSettings(context_), - configuration_.container); + AzureBlobStorage::getRequestSettings(context_->getSettingsRef()), + connection_params.endpoint.container_name); client = object_storage->getAzureBlobStorageClient(); settings = object_storage->getSettings(); } @@ -144,18 +148,18 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu { /// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage container. /// In this case we can't use the native copy. - if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2) + if (auto src_blob_path = src_disk->getBlobPath(src_path); src_blob_path.size() == 2) { LOG_TRACE(log, "Copying file {} from disk {} to AzureBlobStorag", src_path, src_disk->getName()); copyAzureBlobStorageFile( src_disk->getObjectStorage()->getAzureBlobStorageClient(), client, - /* src_container */ blob_path[1], - /* src_path */ blob_path[0], + /* src_container */ src_blob_path[1], + /* src_path */ src_blob_path[0], start_pos, length, - configuration.container, - fs::path(configuration.blob_path) / path_in_backup, + connection_params.endpoint.container_name, + fs::path(blob_path) / path_in_backup, settings, read_settings, threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); @@ -173,11 +177,11 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St copyAzureBlobStorageFile( client, client, - configuration.container, - fs::path(configuration.blob_path)/ source, + connection_params.endpoint.container_name, + fs::path(blob_path)/ source, 0, size, - /* dest_container */ configuration.container, + /* dest_container */ connection_params.endpoint.container_name, /* dest_path */ destination, settings, read_settings, @@ -186,21 +190,29 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) { - copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, fs::path(configuration.blob_path) / path_in_backup, settings, - threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); + copyDataToAzureBlobStorageFile( + create_read_buffer, + start_pos, + length, + client, + connection_params.endpoint.container_name, + fs::path(blob_path) / path_in_backup, + settings, + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), + "BackupWRAzure")); } BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default; bool BackupWriterAzureBlobStorage::fileExists(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return object_storage->exists(StoredObject(key)); } UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; RelativePathsWithMetadata children; object_storage->listObjects(key,children,/*max_keys*/0); if (children.empty()) @@ -210,7 +222,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); @@ -218,7 +230,7 @@ std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, @@ -230,7 +242,7 @@ std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const Strin void BackupWriterAzureBlobStorage::removeFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; StoredObject object(key); object_storage->removeObjectIfExists(object); } @@ -239,7 +251,7 @@ void BackupWriterAzureBlobStorage::removeFiles(const Strings & file_names) { StoredObjects objects; for (const auto & file_name : file_names) - objects.emplace_back(fs::path(configuration.blob_path) / file_name); + objects.emplace_back(fs::path(blob_path) / file_name); object_storage->removeObjectsIfExist(objects); @@ -249,7 +261,7 @@ void BackupWriterAzureBlobStorage::removeFilesBatch(const Strings & file_names) { StoredObjects objects; for (const auto & file_name : file_names) - objects.emplace_back(fs::path(configuration.blob_path) / file_name); + objects.emplace_back(fs::path(blob_path) / file_name); object_storage->removeObjectsIfExist(objects); } diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h index f0b9aace4d4..0829c3258c9 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.h +++ b/src/Backups/BackupIO_AzureBlobStorage.h @@ -1,5 +1,6 @@ #pragma once +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -16,7 +17,7 @@ namespace DB class BackupReaderAzureBlobStorage : public BackupReaderDefault { public: - BackupReaderAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); + BackupReaderAzureBlobStorage(const AzureBlobStorage::ConnectionParams & connection_params_, const String & blob_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); ~BackupReaderAzureBlobStorage() override; bool fileExists(const String & file_name) override; @@ -29,15 +30,16 @@ public: private: const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureBlob::Configuration configuration; + AzureBlobStorage::ConnectionParams connection_params; + String blob_path; std::unique_ptr object_storage; - std::shared_ptr settings; + std::shared_ptr settings; }; class BackupWriterAzureBlobStorage : public BackupWriterDefault { public: - BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container); + BackupWriterAzureBlobStorage(const AzureBlobStorage::ConnectionParams & connection_params_, const String & blob_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container); ~BackupWriterAzureBlobStorage() override; bool fileExists(const String & file_name) override; @@ -58,9 +60,10 @@ private: void removeFilesBatch(const Strings & file_names); const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureBlob::Configuration configuration; + AzureBlobStorage::ConnectionParams connection_params; + String blob_path; std::unique_ptr object_storage; - std::shared_ptr settings; + std::shared_ptr settings; }; } diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 1b9545fc455..6974d16e2f6 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -1,3 +1,4 @@ +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #include @@ -49,7 +50,9 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) const String & id_arg = params.backup_info.id_arg; const auto & args = params.backup_info.args; - StorageAzureBlob::Configuration configuration; + String blob_path; + AzureBlobStorage::ConnectionParams connection_params; + auto request_settings = AzureBlobStorage::getRequestSettings(params.context->getSettingsRef()); if (!id_arg.empty()) { @@ -59,54 +62,41 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (!config.has(config_prefix)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg); - if (config.has(config_prefix + ".connection_string")) + connection_params = { - configuration.connection_url = config.getString(config_prefix + ".connection_string"); - configuration.is_connection_string = true; - configuration.container = config.getString(config_prefix + ".container"); - } - else - { - configuration.connection_url = config.getString(config_prefix + ".storage_account_url"); - configuration.is_connection_string = false; - configuration.container = config.getString(config_prefix + ".container"); - configuration.account_name = config.getString(config_prefix + ".account_name"); - configuration.account_key = config.getString(config_prefix + ".account_key"); - - if (config.has(config_prefix + ".account_name") && config.has(config_prefix + ".account_key")) - { - configuration.account_name = config.getString(config_prefix + ".account_name"); - configuration.account_key = config.getString(config_prefix + ".account_key"); - } - } + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true), + }; if (args.size() > 1) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]"); if (args.size() == 1) - configuration.blob_path = args[0].safeGet(); - + blob_path = args[0].safeGet(); } else { if (args.size() == 3) { - configuration.connection_url = args[0].safeGet(); - configuration.is_connection_string = !configuration.connection_url.starts_with("http"); + auto connection_url = args[0].safeGet(); + auto container_name = args[1].safeGet(); + blob_path = args[2].safeGet(); - configuration.container = args[1].safeGet(); - configuration.blob_path = args[2].safeGet(); + AzureBlobStorage::processURL(connection_url, container_name, connection_params.endpoint, connection_params.auth_method); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true); } else if (args.size() == 5) { - configuration.connection_url = args[0].safeGet(); - configuration.is_connection_string = false; + connection_params.endpoint.storage_account_url = args[0].safeGet(); + connection_params.endpoint.container_name = args[1].safeGet(); + blob_path = args[2].safeGet(); - configuration.container = args[1].safeGet(); - configuration.blob_path = args[2].safeGet(); - configuration.account_name = args[3].safeGet(); - configuration.account_key = args[4].safeGet(); + auto account_name = args[3].safeGet(); + auto account_key = args[4].safeGet(); + connection_params.auth_method = std::make_shared(account_name, account_key); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true); } else { @@ -116,12 +106,12 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) } BackupImpl::ArchiveParams archive_params; - if (hasRegisteredArchiveFileExtension(configuration.blob_path)) + if (hasRegisteredArchiveFileExtension(blob_path)) { if (params.is_internal_backup) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled"); - archive_params.archive_name = removeFileNameFromURL(configuration.blob_path); + archive_params.archive_name = removeFileNameFromURL(blob_path); archive_params.compression_method = params.compression_method; archive_params.compression_level = params.compression_level; archive_params.password = params.password; @@ -135,7 +125,9 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (params.open_mode == IBackup::OpenMode::READ) { - auto reader = std::make_shared(configuration, + auto reader = std::make_shared( + connection_params, + blob_path, params.read_settings, params.write_settings, params.context); @@ -150,7 +142,9 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) } else { - auto writer = std::make_shared(configuration, + auto writer = std::make_shared( + connection_params, + blob_path, params.read_settings, params.write_settings, params.context, diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 04029983d84..e09fef794d7 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -79,23 +79,13 @@ class IColumn; M(UInt64, distributed_connections_pool_size, 1024, "Maximum number of connections with one remote server in the pool.", 0) \ M(UInt64, connections_with_failover_max_tries, 3, "The maximum number of attempts to connect to replicas.", 0) \ M(UInt64, s3_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to S3 (some implementations does not supports variable size parts).", 0) \ - M(UInt64, azure_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to Azure blob storage.", 0) \ M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ M(UInt64, s3_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to S3.", 0) \ - M(UInt64, azure_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage.", 0) \ - M(UInt64, azure_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage.", 0) \ M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \ M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3, s3_min_upload_part_size is multiplied by s3_upload_part_size_multiply_factor.", 0) \ - M(UInt64, azure_upload_part_size_multiply_factor, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage.", 0) \ - M(UInt64, azure_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor.", 0) \ M(UInt64, s3_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \ - M(UInt64, azure_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \ M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \ - M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \ - M(UInt64, azure_max_single_part_copy_size, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage.", 0) \ M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \ - M(UInt64, azure_max_single_read_retries, 4, "The maximum number of retries during single Azure blob storage read.", 0) \ - M(UInt64, azure_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write", 0) \ M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \ M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ @@ -105,20 +95,33 @@ class IColumn; M(UInt64, s3_max_put_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_put_rps`", 0) \ M(UInt64, s3_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ M(Bool, s3_use_adaptive_timeouts, true, "When adaptive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \ - M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \ - M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \ M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \ M(Bool, s3_skip_empty_files, false, "Allow to skip empty files in s3 table engine", 0) \ - M(Bool, azure_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in azure engine tables", 0) \ M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \ M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \ - M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \ M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \ M(UInt64, s3_connect_timeout_ms, 1000, "Connection timeout for host from s3 disks.", 0) \ + M(UInt64, azure_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to Azure blob storage.", 0) \ + M(UInt64, azure_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage.", 0) \ + M(UInt64, azure_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage.", 0) \ + M(UInt64, azure_upload_part_size_multiply_factor, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage.", 0) \ + M(UInt64, azure_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor.", 0) \ + M(UInt64, azure_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \ + M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \ + M(UInt64, azure_max_single_part_copy_size, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage.", 0) \ + M(UInt64, azure_max_single_read_retries, 4, "The maximum number of retries during single Azure blob storage read.", 0) \ + M(UInt64, azure_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write", 0) \ + M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ + M(UInt64, azure_sdk_max_retries, 10, "Maximum number of retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff beetween retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff beetween retries in azure sdk", 0) \ + M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \ + M(Bool, azure_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in azure engine tables", 0) \ + M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \ M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \ M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \ M(Bool, s3queue_enable_logging_to_s3queue_log, false, "Enable writing to system.s3queue_log. The value can be overwritten per table with table settings", 0) \ diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 2c90e3a9003..cadae33e23e 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -26,7 +26,7 @@ struct WriteBufferFromAzureBlobStorage::PartData std::string block_id; }; -BufferAllocationPolicyPtr createBufferAllocationPolicy(const AzureObjectStorageSettings & settings) +BufferAllocationPolicyPtr createBufferAllocationPolicy(const AzureBlobStorage::RequestSettings & settings) { BufferAllocationPolicy::Settings allocation_settings; allocation_settings.strict_size = settings.strict_upload_part_size; @@ -44,7 +44,7 @@ WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( const String & blob_path_, size_t buf_size_, const WriteSettings & write_settings_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_) : WriteBufferFromFileBase(buf_size_, nullptr, 0) , log(getLogger("WriteBufferFromAzureBlobStorage")) diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 96ba6acefff..f47ba92beab 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -35,7 +35,7 @@ public: const String & blob_path_, size_t buf_size_, const WriteSettings & write_settings_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); ~WriteBufferFromAzureBlobStorage() override; diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h deleted file mode 100644 index e4775a053c1..00000000000 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include - -namespace DB -{ - -struct AzureBlobStorageEndpoint -{ - const String storage_account_url; - const String account_name; - const String container_name; - const String prefix; - const std::optional container_already_exists; - - String getEndpoint() - { - String url = storage_account_url; - if (url.ends_with('/')) - url.pop_back(); - - if (!account_name.empty()) - url += "/" + account_name; - - if (!container_name.empty()) - url += "/" + container_name; - - if (!prefix.empty()) - url += "/" + prefix; - - return url; - } - - String getEndpointWithoutContainer() - { - String url = storage_account_url; - - if (!account_name.empty()) - url += "/" + account_name; - - return url; - } -}; - -std::unique_ptr getAzureBlobContainerClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); - -AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); - -std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); - -} - -#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp similarity index 53% rename from src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp rename to src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp index a535b007541..76054efff19 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp @@ -1,4 +1,8 @@ -#include +#include +#include +#include +#include +#include #if USE_AZURE_BLOB_STORAGE @@ -7,13 +11,9 @@ #include #include #include -#include #include #include -using namespace Azure::Storage::Blobs; - - namespace DB { @@ -22,8 +22,10 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +namespace AzureBlobStorage +{ -void validateStorageAccountUrl(const String & storage_account_url) +static void validateStorageAccountUrl(const String & storage_account_url) { const auto * storage_account_url_pattern_str = R"(http(()|s)://[a-z0-9-.:]+(()|/)[a-z0-9]*(()|/))"; static const RE2 storage_account_url_pattern(storage_account_url_pattern_str); @@ -33,8 +35,7 @@ void validateStorageAccountUrl(const String & storage_account_url) "Blob Storage URL is not valid, should follow the format: {}, got: {}", storage_account_url_pattern_str, storage_account_url); } - -void validateContainerName(const String & container_name) +static void validateContainerName(const String & container_name) { auto len = container_name.length(); if (len < 3 || len > 64) @@ -50,13 +51,51 @@ void validateContainerName(const String & container_name) container_name_pattern_str, container_name); } +static bool isConnectionString(const std::string & candidate) +{ + return !candidate.starts_with("http"); +} -AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +String ConnectionParams::getConnectionURL() const +{ + if (std::holds_alternative(auth_method)) + { + auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(endpoint.storage_account_url); + return parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl(); + } + + return endpoint.storage_account_url; +} + +std::unique_ptr ConnectionParams::createForService() const +{ + return std::visit([this](const T & auth) + { + if constexpr (std::is_same_v) + return std::make_unique(ServiceClient::CreateFromConnectionString(auth.toUnderType(), client_options)); + else + return std::make_unique(endpoint.getEndpointWithoutContainer(), auth, client_options); + }, auth_method); +} + +std::unique_ptr ConnectionParams::createForContainer() const +{ + return std::visit([this](const T & auth) + { + if constexpr (std::is_same_v) + return std::make_unique(ContainerClient::CreateFromConnectionString(auth.toUnderType(), endpoint.container_name, client_options)); + else + return std::make_unique(endpoint.getEndpoint(), auth, client_options); + }, auth_method); +} + +Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { String storage_url; String account_name; String container_name; String prefix; + if (config.has(config_prefix + ".endpoint")) { String endpoint = config.getString(config_prefix + ".endpoint"); @@ -71,48 +110,48 @@ AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::Abstr if (endpoint_contains_account_name) { - size_t acc_pos_begin = endpoint.find('/', pos+2); + size_t acc_pos_begin = endpoint.find('/', pos + 2); if (acc_pos_begin == std::string::npos) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected account_name in endpoint"); - storage_url = endpoint.substr(0,acc_pos_begin); - size_t acc_pos_end = endpoint.find('/',acc_pos_begin+1); + storage_url = endpoint.substr(0, acc_pos_begin); + size_t acc_pos_end = endpoint.find('/', acc_pos_begin + 1); if (acc_pos_end == std::string::npos) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); - account_name = endpoint.substr(acc_pos_begin+1,(acc_pos_end-acc_pos_begin)-1); + account_name = endpoint.substr(acc_pos_begin + 1, acc_pos_end - acc_pos_begin - 1); - size_t cont_pos_end = endpoint.find('/', acc_pos_end+1); + size_t cont_pos_end = endpoint.find('/', acc_pos_end + 1); if (cont_pos_end != std::string::npos) { - container_name = endpoint.substr(acc_pos_end+1,(cont_pos_end-acc_pos_end)-1); - prefix = endpoint.substr(cont_pos_end+1); + container_name = endpoint.substr(acc_pos_end + 1, cont_pos_end - acc_pos_end - 1); + prefix = endpoint.substr(cont_pos_end + 1); } else { - container_name = endpoint.substr(acc_pos_end+1); + container_name = endpoint.substr(acc_pos_end + 1); } } else { - size_t cont_pos_begin = endpoint.find('/', pos+2); + size_t cont_pos_begin = endpoint.find('/', pos + 2); if (cont_pos_begin == std::string::npos) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); - storage_url = endpoint.substr(0,cont_pos_begin); - size_t cont_pos_end = endpoint.find('/',cont_pos_begin+1); + storage_url = endpoint.substr(0, cont_pos_begin); + size_t cont_pos_end = endpoint.find('/', cont_pos_begin + 1); if (cont_pos_end != std::string::npos) { - container_name = endpoint.substr(cont_pos_begin+1,(cont_pos_end-cont_pos_begin)-1); - prefix = endpoint.substr(cont_pos_end+1); + container_name = endpoint.substr(cont_pos_begin + 1,cont_pos_end - cont_pos_begin - 1); + prefix = endpoint.substr(cont_pos_end + 1); } else { - container_name = endpoint.substr(cont_pos_begin+1); + container_name = endpoint.substr(cont_pos_begin + 1); } } } @@ -132,122 +171,117 @@ AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::Abstr if (!container_name.empty()) validateContainerName(container_name); + std::optional container_already_exists {}; if (config.has(config_prefix + ".container_already_exists")) container_already_exists = {config.getBool(config_prefix + ".container_already_exists")}; - return {storage_url, account_name, container_name, prefix, container_already_exists}; + + return {storage_url, account_name, container_name, prefix, "", container_already_exists}; } - -template -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name, const BlobClientOptions & client_options) = delete; - -template<> -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & /*container_name*/, const BlobClientOptions & client_options) +void processURL(const String & url, const String & container_name, Endpoint & endpoint, AuthMethod & auth_method) { - return std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_str, client_options)); -} + endpoint.container_name = container_name; -template<> -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name, const BlobClientOptions & client_options) -{ - return std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_str, container_name, client_options)); -} - -template -std::unique_ptr getAzureBlobStorageClientWithAuth( - const String & url, - const String & container_name, - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - const Azure::Storage::Blobs::BlobClientOptions & client_options) -{ - std::string connection_str; - if (config.has(config_prefix + ".connection_string")) - connection_str = config.getString(config_prefix + ".connection_string"); - - if (!connection_str.empty()) - return getClientWithConnectionString(connection_str, container_name, client_options); - - if (config.has(config_prefix + ".account_key") && config.has(config_prefix + ".account_name")) + if (isConnectionString(url)) { - auto storage_shared_key_credential = std::make_shared( - config.getString(config_prefix + ".account_name"), - config.getString(config_prefix + ".account_key") - ); - return std::make_unique(url, storage_shared_key_credential, client_options); + endpoint.storage_account_url = url; + auth_method = ConnectionString{url}; + return; } - if (config.getBool(config_prefix + ".use_workload_identity", false)) - { - auto workload_identity_credential = std::make_shared(); - return std::make_unique(url, workload_identity_credential); - } + size_t pos = url.find('?'); - auto managed_identity_credential = std::make_shared(); - return std::make_unique(url, managed_identity_credential, client_options); + /// If conneciton_url does not have '?', then its not SAS + if (pos == std::string::npos) + { + endpoint.storage_account_url = url; + auth_method = std::make_shared(); + } + else + { + endpoint.storage_account_url = url.substr(0, pos); + endpoint.sas_auth = url.substr(pos + 1); + auth_method = std::make_shared(); + } } -Azure::Storage::Blobs::BlobClientOptions getAzureBlobClientOptions(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +std::unique_ptr getContainerClient(const ConnectionParams & params, bool readonly) { - Azure::Core::Http::Policies::RetryOptions retry_options; - retry_options.MaxRetries = config.getUInt(config_prefix + ".max_tries", 10); - retry_options.RetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_initial_backoff_ms", 10)); - retry_options.MaxRetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_max_backoff_ms", 1000)); - - using CurlOptions = Azure::Core::Http::CurlTransportOptions; - CurlOptions curl_options; - curl_options.NoSignal = true; - - if (config.has(config_prefix + ".curl_ip_resolve")) - { - auto value = config.getString(config_prefix + ".curl_ip_resolve"); - if (value == "ipv4") - curl_options.IPResolve = CurlOptions::CURL_IPRESOLVE_V4; - else if (value == "ipv6") - curl_options.IPResolve = CurlOptions::CURL_IPRESOLVE_V6; - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value for option 'curl_ip_resolve': {}. Expected one of 'ipv4' or 'ipv6'", value); - } - - Azure::Storage::Blobs::BlobClientOptions client_options; - client_options.Retry = retry_options; - client_options.Transport.Transport = std::make_shared(curl_options); - - client_options.ClickhouseOptions = Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}; - - return client_options; -} - -std::unique_ptr getAzureBlobContainerClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - auto endpoint = processAzureBlobStorageEndpoint(config, config_prefix); - auto container_name = endpoint.container_name; - auto final_url = endpoint.getEndpoint(); - auto client_options = getAzureBlobClientOptions(config, config_prefix); - - if (endpoint.container_already_exists.value_or(false)) - return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix, client_options); - - auto blob_service_client = getAzureBlobStorageClientWithAuth(endpoint.getEndpointWithoutContainer(), container_name, config, config_prefix, client_options); + if (params.endpoint.container_already_exists.value_or(false) || readonly) + return params.createForContainer(); try { - return std::make_unique(blob_service_client->CreateBlobContainer(container_name).Value); + auto service_client = params.createForService(); + return std::make_unique(service_client->CreateBlobContainer(params.endpoint.container_name).Value); } catch (const Azure::Storage::StorageException & e) { /// If container_already_exists is not set (in config), ignore already exists error. /// (Conflict - The specified container already exists) - if (!endpoint.container_already_exists.has_value() && e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) - return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix, client_options); + if (!params.endpoint.container_already_exists.has_value() && e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) + return params.createForContainer(); throw; } } -std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +AuthMethod getAuthMethod(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { - std::unique_ptr settings = std::make_unique(); + if (config.has(config_prefix + ".account_key") && config.has(config_prefix + ".account_name")) + { + return std::make_shared( + config.getString(config_prefix + ".account_name"), + config.getString(config_prefix + ".account_key") + ); + } + + if (config.has(config_prefix + ".connection_string")) + return ConnectionString{config.getString(config_prefix + ".connection_string")}; + + if (config.getBool(config_prefix + ".use_workload_identity", false)) + return std::make_shared(); + + return std::make_shared(); +} + +BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_disk) +{ + Azure::Core::Http::Policies::RetryOptions retry_options; + retry_options.MaxRetries = static_cast(settings.sdk_max_retries); + retry_options.RetryDelay = std::chrono::milliseconds(settings.sdk_retry_initial_backoff_ms); + retry_options.MaxRetryDelay = std::chrono::milliseconds(settings.sdk_retry_max_backoff_ms); + + Azure::Core::Http::CurlTransportOptions curl_options; + curl_options.NoSignal = true; + curl_options.IPResolve = settings.curl_ip_resolve; + + Azure::Storage::Blobs::BlobClientOptions client_options; + client_options.Retry = retry_options; + client_options.Transport.Transport = std::make_shared(curl_options); + client_options.ClickhouseOptions = Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=for_disk}; + + return client_options; +} + +std::unique_ptr getRequestSettings(const Settings & query_settings) +{ + auto settings_ptr = std::make_unique(); + + settings_ptr->max_single_part_upload_size = query_settings.azure_max_single_part_upload_size; + settings_ptr->max_single_read_retries = query_settings.azure_max_single_read_retries; + settings_ptr->list_object_keys_size = static_cast(query_settings.azure_list_object_keys_size); + settings_ptr->sdk_max_retries = query_settings.azure_sdk_max_retries; + settings_ptr->sdk_retry_initial_backoff_ms = query_settings.azure_sdk_retry_initial_backoff_ms; + settings_ptr->sdk_retry_max_backoff_ms = query_settings.azure_sdk_retry_max_backoff_ms; + + return settings_ptr; +} + +std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +{ + auto settings = std::make_unique(); + settings->max_single_part_upload_size = config.getUInt64(config_prefix + ".max_single_part_upload_size", context->getSettings().azure_max_single_part_upload_size); settings->min_bytes_for_seek = config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024); settings->max_single_read_retries = config.getInt(config_prefix + ".max_single_read_retries", 3); @@ -262,10 +296,28 @@ std::unique_ptr getAzureBlobStorageSettings(const Po settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", context->getSettings().azure_strict_upload_part_size); settings->upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".upload_part_size_multiply_factor", context->getSettings().azure_upload_part_size_multiply_factor); settings->upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".upload_part_size_multiply_parts_count_threshold", context->getSettings().azure_upload_part_size_multiply_parts_count_threshold); + settings->sdk_max_retries = config.getUInt(config_prefix + ".max_tries", 10); + settings->sdk_retry_initial_backoff_ms = config.getUInt(config_prefix + ".retry_initial_backoff_ms", 10); + settings->sdk_retry_max_backoff_ms = config.getUInt(config_prefix + ".retry_max_backoff_ms", 1000); + + if (config.has(config_prefix + ".curl_ip_resolve")) + { + using CurlOptions = Azure::Core::Http::CurlTransportOptions; + + auto value = config.getString(config_prefix + ".curl_ip_resolve"); + if (value == "ipv4") + settings->curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_V4; + else if (value == "ipv6") + settings->curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_V6; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value for option 'curl_ip_resolve': {}. Expected one of 'ipv4' or 'ipv6'", value); + } return settings; } } +} + #endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h new file mode 100644 index 00000000000..7e716adf4d0 --- /dev/null +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h @@ -0,0 +1,139 @@ +#pragma once + +#include +#include +#include "base/strong_typedef.h" +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace AzureBlobStorage +{ + +using ServiceClient = Azure::Storage::Blobs::BlobServiceClient; +using ContainerClient = Azure::Storage::Blobs::BlobContainerClient; +using BlobClient = Azure::Storage::Blobs::BlobClient; +using BlobClientOptions = Azure::Storage::Blobs::BlobClientOptions; + +struct RequestSettings +{ + RequestSettings() = default; + + size_t max_single_part_upload_size = 100 * 1024 * 1024; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset + uint64_t min_bytes_for_seek = 1024 * 1024; + size_t max_single_read_retries = 3; + size_t max_single_download_retries = 3; + int list_object_keys_size = 1000; + size_t min_upload_part_size = 16 * 1024 * 1024; + size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; + size_t max_single_part_copy_size = 256 * 1024 * 1024; + bool use_native_copy = false; + size_t max_unexpected_write_error_retries = 4; + size_t max_inflight_parts_for_one_file = 20; + size_t strict_upload_part_size = 0; + size_t upload_part_size_multiply_factor = 2; + size_t upload_part_size_multiply_parts_count_threshold = 500; + size_t sdk_max_retries = 10; + size_t sdk_retry_initial_backoff_ms = 10; + size_t sdk_retry_max_backoff_ms = 1000; + + using CurlOptions = Azure::Core::Http::CurlTransportOptions; + CurlOptions::CurlOptIPResolve curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_WHATEVER; +}; + +struct Endpoint +{ + String storage_account_url; + String account_name; + String container_name; + String prefix; + String sas_auth; + std::optional container_already_exists; + + String getEndpoint() const + { + String url = storage_account_url; + if (url.ends_with('/')) + url.pop_back(); + + if (!account_name.empty()) + url += "/" + account_name; + + if (!container_name.empty()) + url += "/" + container_name; + + if (!prefix.empty()) + url += "/" + prefix; + + if (!sas_auth.empty()) + url += "?" + sas_auth; + + return url; + } + + String getEndpointWithoutContainer() const + { + String url = storage_account_url; + + if (!account_name.empty()) + url += "/" + account_name; + + if (!sas_auth.empty()) + url += "?" + sas_auth; + + return url; + } +}; + +using ConnectionString = StrongTypedef; + +using AuthMethod = std::variant< + ConnectionString, + std::shared_ptr, + std::shared_ptr, + std::shared_ptr>; + +struct ConnectionParams +{ + Endpoint endpoint; + AuthMethod auth_method; + BlobClientOptions client_options; + + String getContainer() const { return endpoint.container_name; } + String getConnectionURL() const; + + std::unique_ptr createForService() const; + std::unique_ptr createForContainer() const; +}; + +Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); +void processURL(const String & url, const String & container_name, Endpoint & endpoint, AuthMethod & auth_method); + +std::unique_ptr getContainerClient(const ConnectionParams & params, bool readonly); + +BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_disk); +AuthMethod getAuthMethod(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); +std::unique_ptr getRequestSettings(const Settings & query_settings); +std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + +} + +} + +#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 36225b13ee8..d0f39beb3ca 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include @@ -105,7 +105,7 @@ private: AzureObjectStorage::AzureObjectStorage( const String & name_, - AzureClientPtr && client_, + ClientPtr && client_, SettingsPtr && settings_, const String & object_namespace_) : name(name_) @@ -397,20 +397,37 @@ void AzureObjectStorage::copyObject( /// NOLINT void AzureObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { - auto new_settings = getAzureBlobStorageSettings(config, config_prefix, context); + auto new_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); + bool is_client_for_disk = client.get()->GetClickhouseOptions().IsClientForDisk; + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*new_settings, is_client_for_disk), + }; + + auto new_client = AzureBlobStorage::getContainerClient(params, /*readonly=*/ true); + settings.set(std::move(new_settings)); - /// We don't update client + client.set(std::move(new_client)); } std::unique_ptr AzureObjectStorage::cloneObjectStorage(const std::string &, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { - return std::make_unique( - name, - getAzureBlobContainerClient(config, config_prefix), - getAzureBlobStorageSettings(config, config_prefix, context), - object_namespace - ); + auto new_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); + bool is_client_for_disk = client.get()->GetClickhouseOptions().IsClientForDisk; + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*new_settings, is_client_for_disk), + }; + + auto new_client = AzureBlobStorage::getContainerClient(params, /*readonly=*/ true); + return std::make_unique(name, std::move(new_client), std::move(new_settings), object_namespace); } } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index f52ab803012..d8440453852 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -1,4 +1,5 @@ #pragma once +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -7,6 +8,7 @@ #include #include #include +#include namespace Poco { @@ -16,70 +18,15 @@ class Logger; namespace DB { -struct AzureObjectStorageSettings -{ - AzureObjectStorageSettings( - uint64_t max_single_part_upload_size_, - uint64_t min_bytes_for_seek_, - int max_single_read_retries_, - int max_single_download_retries_, - int list_object_keys_size_, - size_t min_upload_part_size_, - size_t max_upload_part_size_, - size_t max_single_part_copy_size_, - bool use_native_copy_, - size_t max_unexpected_write_error_retries_, - size_t max_inflight_parts_for_one_file_, - size_t strict_upload_part_size_, - size_t upload_part_size_multiply_factor_, - size_t upload_part_size_multiply_parts_count_threshold_) - : max_single_part_upload_size(max_single_part_upload_size_) - , min_bytes_for_seek(min_bytes_for_seek_) - , max_single_read_retries(max_single_read_retries_) - , max_single_download_retries(max_single_download_retries_) - , list_object_keys_size(list_object_keys_size_) - , min_upload_part_size(min_upload_part_size_) - , max_upload_part_size(max_upload_part_size_) - , max_single_part_copy_size(max_single_part_copy_size_) - , use_native_copy(use_native_copy_) - , max_unexpected_write_error_retries(max_unexpected_write_error_retries_) - , max_inflight_parts_for_one_file(max_inflight_parts_for_one_file_) - , strict_upload_part_size(strict_upload_part_size_) - , upload_part_size_multiply_factor(upload_part_size_multiply_factor_) - , upload_part_size_multiply_parts_count_threshold(upload_part_size_multiply_parts_count_threshold_) - { - } - - AzureObjectStorageSettings() = default; - - size_t max_single_part_upload_size = 100 * 1024 * 1024; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset - uint64_t min_bytes_for_seek = 1024 * 1024; - size_t max_single_read_retries = 3; - size_t max_single_download_retries = 3; - int list_object_keys_size = 1000; - size_t min_upload_part_size = 16 * 1024 * 1024; - size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; - size_t max_single_part_copy_size = 256 * 1024 * 1024; - bool use_native_copy = false; - size_t max_unexpected_write_error_retries = 4; - size_t max_inflight_parts_for_one_file = 20; - size_t strict_upload_part_size = 0; - size_t upload_part_size_multiply_factor = 2; - size_t upload_part_size_multiply_parts_count_threshold = 500; -}; - -using AzureClient = Azure::Storage::Blobs::BlobContainerClient; -using AzureClientPtr = std::unique_ptr; - class AzureObjectStorage : public IObjectStorage { public: - - using SettingsPtr = std::unique_ptr; + using ClientPtr = std::unique_ptr; + using SettingsPtr = std::unique_ptr; AzureObjectStorage( const String & name_, - AzureClientPtr && client_, + ClientPtr && client_, SettingsPtr && settings_, const String & object_namespace_); @@ -156,12 +103,8 @@ public: bool isRemote() const override { return true; } - std::shared_ptr getSettings() { return settings.get(); } - - std::shared_ptr getAzureBlobStorageClient() override - { - return client.get(); - } + std::shared_ptr getSettings() const { return settings.get(); } + std::shared_ptr getAzureBlobStorageClient() const override{ return client.get(); } private: using SharedAzureClientPtr = std::shared_ptr; @@ -169,8 +112,8 @@ private: const String name; /// client used to access the files in the Blob Storage cloud - MultiVersion client; - MultiVersion settings; + MultiVersion client; + MultiVersion settings; const String object_namespace; /// container + prefix LoggerPtr log; diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index 961c2709efc..60818933dec 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -120,7 +120,7 @@ public: const FileCacheSettings & getCacheSettings() const { return cache_settings; } #if USE_AZURE_BLOB_STORAGE - std::shared_ptr getAzureBlobStorageClient() override + std::shared_ptr getAzureBlobStorageClient() const override { return object_storage->getAzureBlobStorageClient(); } diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index eae31af9d44..27a58053752 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -238,7 +238,7 @@ public: virtual void setKeysGenerator(ObjectStorageKeysGeneratorPtr) { } #if USE_AZURE_BLOB_STORAGE - virtual std::shared_ptr getAzureBlobStorageClient() + virtual std::shared_ptr getAzureBlobStorageClient() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This function is only implemented for AzureBlobStorage"); } diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 7b949db268b..f8c1c564191 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -13,7 +13,7 @@ #endif #if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) #include -#include +#include #endif #ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include @@ -293,12 +293,19 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) const ContextPtr & context, bool /* skip_access_check */) -> ObjectStoragePtr { - AzureBlobStorageEndpoint endpoint = processAzureBlobStorageEndpoint(config, config_prefix); + auto azure_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*azure_settings, /*for_disk=*/ true), + }; + return createObjectStorage( ObjectStorageType::Azure, config, config_prefix, name, - getAzureBlobContainerClient(config, config_prefix), - getAzureBlobStorageSettings(config, config_prefix, context), - endpoint.prefix.empty() ? endpoint.container_name : endpoint.container_name + "/" + endpoint.prefix); + AzureBlobStorage::getContainerClient(params, /*readonly=*/ false), std::move(azure_settings), + params.endpoint.prefix.empty() ? params.endpoint.container_name : params.endpoint.container_name + "/" + params.endpoint.prefix); }; factory.registerObjectStorageType("azure_blob_storage", creator); factory.registerObjectStorageType("azure", creator); diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 769f1a184f6..d648796b5df 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -44,7 +44,7 @@ namespace size_t total_size_, const String & dest_container_for_logging_, const String & dest_blob_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_, const Poco::Logger * log_) : create_read_buffer(create_read_buffer_) @@ -69,7 +69,7 @@ namespace size_t total_size; const String & dest_container_for_logging; const String & dest_blob; - std::shared_ptr settings; + std::shared_ptr settings; ThreadPoolCallbackRunnerUnsafe schedule; const Poco::Logger * log; size_t max_single_part_upload_size; @@ -265,7 +265,7 @@ void copyDataToAzureBlobStorageFile( std::shared_ptr dest_client, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, ThreadPoolCallbackRunnerUnsafe schedule) { UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, &Poco::Logger::get("copyDataToAzureBlobStorageFile")}; @@ -282,11 +282,10 @@ void copyAzureBlobStorageFile( size_t size, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule) { - if (settings->use_native_copy) { ProfileEvents::increment(ProfileEvents::AzureCopyObject); diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h index 6ad54923ab5..73b91191b96 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h @@ -29,7 +29,7 @@ void copyAzureBlobStorageFile( size_t src_size, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); @@ -46,7 +46,7 @@ void copyDataToAzureBlobStorageFile( std::shared_ptr client, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); } diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index f2e2833dad4..d8fd5cbf05a 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -1,4 +1,6 @@ +#include #include +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #if USE_AZURE_BLOB_STORAGE #include @@ -95,43 +97,62 @@ const std::unordered_set optional_configuration_keys = { "storage_account_url", }; -bool isConnectionString(const std::string & candidate) -{ - return !candidate.starts_with("http"); } -} - -void StorageAzureBlob::processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection) +void StorageAzureBlob::processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection, const ContextPtr & local_context) { validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); + String connection_url; + String container_name; + std::optional account_name; + std::optional account_key; + if (collection.has("connection_string")) - { - configuration.connection_url = collection.get("connection_string"); - configuration.is_connection_string = true; - } + connection_url = collection.get("connection_string"); + else if (collection.has("storage_account_url")) + connection_url = collection.get("storage_account_url"); - if (collection.has("storage_account_url")) - { - configuration.connection_url = collection.get("storage_account_url"); - configuration.is_connection_string = false; - } - - configuration.container = collection.get("container"); + container_name = collection.get("container"); configuration.blob_path = collection.get("blob_path"); if (collection.has("account_name")) - configuration.account_name = collection.get("account_name"); + account_name = collection.get("account_name"); if (collection.has("account_key")) - configuration.account_key = collection.get("account_key"); + account_key = collection.get("account_key"); configuration.structure = collection.getOrDefault("structure", "auto"); configuration.format = collection.getOrDefault("format", configuration.format); configuration.compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); + configuration.connection_params = getConnectionParams(connection_url, container_name, account_name, account_key, local_context); } +AzureBlobStorage::ConnectionParams StorageAzureBlob::getConnectionParams( + const String & connection_url, + const String & container_name, + const std::optional & account_name, + const std::optional & account_key, + const ContextPtr & local_context) +{ + AzureBlobStorage::ConnectionParams connection_params; + auto request_settings = AzureBlobStorage::getRequestSettings(local_context->getSettingsRef()); + + if (account_name && account_key) + { + connection_params.endpoint.storage_account_url = connection_url; + connection_params.endpoint.container_name = container_name; + connection_params.auth_method = std::make_shared(*account_name, *account_key); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ false); + } + else + { + AzureBlobStorage::processURL(connection_url, container_name, connection_params.endpoint, connection_params.auth_method); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ false); + } + + return connection_params; +} StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine_args, const ContextPtr & local_context) { @@ -144,8 +165,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) { - processNamedCollectionResult(configuration, *named_collection); - + processNamedCollectionResult(configuration, *named_collection, local_context); configuration.blobs_paths = {configuration.blob_path}; if (configuration.format == "auto") @@ -164,11 +184,12 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine std::unordered_map engine_args_to_idx; - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); + String connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); + String container = checkAndGetLiteralArgument(engine_args[1], "container"); + configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blob_path"); - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); - configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); + std::optional account_name; + std::optional account_key; auto is_format_arg = [] (const std::string & s) -> bool { @@ -198,8 +219,8 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine } else { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); } } else if (engine_args.size() == 6) @@ -211,12 +232,13 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine } else { - configuration.account_name = fourth_arg; + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); if (!is_format_arg(sixth_arg)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + configuration.format = sixth_arg; } } @@ -229,17 +251,20 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine } else { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); if (!is_format_arg(sixth_arg)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + configuration.format = sixth_arg; configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); } } configuration.blobs_paths = {configuration.blob_path}; + configuration.connection_params = getConnectionParams(connection_url, container, account_name, account_key, local_context); if (configuration.format == "auto") configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); @@ -247,18 +272,6 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine return configuration; } - -AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(const ContextPtr & local_context) -{ - const auto & context_settings = local_context->getSettingsRef(); - auto settings_ptr = std::make_unique(); - settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; - settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; - settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); - - return settings_ptr; -} - void registerStorageAzureBlob(StorageFactory & factory) { factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args) @@ -268,7 +281,8 @@ void registerStorageAzureBlob(StorageFactory & factory) throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); auto configuration = StorageAzureBlob::getConfiguration(engine_args, args.getLocalContext()); - auto client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); + auto client = AzureBlobStorage::getContainerClient(configuration.connection_params, /*readonly=*/ false); + // Use format settings from global server context + settings from // the SETTINGS clause of the create query. Settings from current // session and user are ignored. @@ -299,11 +313,11 @@ void registerStorageAzureBlob(StorageFactory & factory) if (args.storage_def->partition_by) partition_by = args.storage_def->partition_by->clone(); - auto settings = StorageAzureBlob::createSettings(args.getContext()); + auto azure_settings = AzureBlobStorage::getRequestSettings(args.getContext()->getSettingsRef()); return std::make_shared( - std::move(configuration), - std::make_unique("AzureBlobStorage", std::move(client), std::move(settings),configuration.container), + configuration, + std::make_unique("AzureBlobStorage", std::move(client), std::move(azure_settings), configuration.connection_params.getContainer()), args.getContext(), args.table_id, args.columns, @@ -321,177 +335,6 @@ void registerStorageAzureBlob(StorageFactory & factory) }); } -static bool containerExists(std::unique_ptr &blob_service_client, std::string container_name) -{ - Azure::Storage::Blobs::ListBlobContainersOptions options; - options.Prefix = container_name; - options.PageSizeHint = 1; - - auto containers_list_response = blob_service_client->ListBlobContainers(options); - auto containers_list = containers_list_response.BlobContainers; - - for (const auto & container : containers_list) - { - if (container_name == container.Name) - return true; - } - return false; -} - -AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container) -{ - AzureClientPtr result; - - if (configuration.is_connection_string) - { - std::shared_ptr managed_identity_credential = std::make_shared(); - std::unique_ptr blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(configuration.connection_url)); - result = std::make_unique(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container)); - - if (attempt_to_create_container) - { - bool container_exists = containerExists(blob_service_client,configuration.container); - if (!container_exists) - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - - try - { - result->CreateIfNotExists(); - } - catch (const Azure::Storage::StorageException & e) - { - if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.")) - { - throw; - } - } - } - } - } - else - { - std::shared_ptr storage_shared_key_credential; - if (configuration.account_name.has_value() && configuration.account_key.has_value()) - { - storage_shared_key_credential - = std::make_shared(*configuration.account_name, *configuration.account_key); - } - - std::unique_ptr blob_service_client; - size_t pos = configuration.connection_url.find('?'); - std::shared_ptr managed_identity_credential; - if (storage_shared_key_credential) - { - blob_service_client = std::make_unique(configuration.connection_url, storage_shared_key_credential); - } - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, workload_identity_credential); - } - else - { - managed_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, managed_identity_credential); - } - } - - std::string final_url; - if (pos != std::string::npos) - { - auto url_without_sas = configuration.connection_url.substr(0, pos); - final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + configuration.container - + configuration.connection_url.substr(pos); - } - else - final_url - = configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container; - - if (!attempt_to_create_container) - { - if (storage_shared_key_credential) - return std::make_unique(final_url, storage_shared_key_credential); - else - return std::make_unique(final_url, managed_identity_credential); - } - - bool container_exists = containerExists(blob_service_client,configuration.container); - if (container_exists) - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - try - { - result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } - catch (const Azure::Storage::StorageException & e) - { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.") - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - throw; - } - } - } - } - - return result; -} - -Poco::URI StorageAzureBlob::Configuration::getConnectionURL() const -{ - if (!is_connection_string) - return Poco::URI(connection_url); - - auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url); - return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); -} - - StorageAzureBlob::StorageAzureBlob( const Configuration & configuration_, std::unique_ptr && object_storage_, @@ -513,7 +356,8 @@ StorageAzureBlob::StorageAzureBlob( { if (configuration.format != "auto") FormatFactory::instance().checkFormatName(configuration.format); - context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.getConnectionURL()); + + context->getGlobalContext()->getRemoteHostFilter().checkURL(Poco::URI(configuration.connection_params.getConnectionURL())); StorageInMemoryMetadata storage_metadata; if (columns_.empty()) @@ -850,13 +694,13 @@ void ReadFromAzureBlob::createIterator(const ActionsDAG::Node * predicate) { /// Iterate through disclosed globs and make a source for each file iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blob_path, + storage->object_storage.get(), configuration.connection_params.getContainer(), configuration.blob_path, predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); } else { iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blobs_paths, + storage->object_storage.get(), configuration.connection_params.getContainer(), configuration.blobs_paths, predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); } } @@ -879,8 +723,8 @@ void ReadFromAzureBlob::initializePipeline(QueryPipelineBuilder & pipeline, cons max_block_size, configuration.compression_method, storage->object_storage.get(), - configuration.container, - configuration.connection_url, + configuration.connection_params.getContainer(), + configuration.connection_params.endpoint.storage_account_url, iterator_wrapper, need_only_count)); } @@ -1455,7 +1299,8 @@ namespace if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure) return; - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; + const auto & params = configuration.connection_params; + String source = fs::path(params.getConnectionURL()) / params.getContainer() / current_path_with_metadata.relative_path; auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows); } @@ -1466,7 +1311,8 @@ namespace || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) return; - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; + const auto & params = configuration.connection_params; + String source = fs::path(params.getConnectionURL()) / params.getContainer() / current_path_with_metadata.relative_path; auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addColumns(key, columns); } @@ -1477,7 +1323,9 @@ namespace || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) return; - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; + const auto & params = configuration.connection_params; + auto host_and_bucket = params.getConnectionURL() + '/' + params.getContainer(); + Strings sources; sources.reserve(read_keys.size()); std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; }); @@ -1520,8 +1368,10 @@ namespace return std::nullopt; }; - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; + const auto & params = configuration.connection_params; + auto host_and_bucket = params.getConnectionURL() + '/' + params.getContainer(); String source = host_and_bucket + '/' + it->relative_path; + if (format) { auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); @@ -1573,12 +1423,12 @@ std::pair StorageAzureBlob::getTableStructureAndForm if (configuration.withGlobs()) { file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blob_path, nullptr, NamesAndTypesList{}, ctx, &read_keys); + object_storage, configuration.connection_params.getContainer(), configuration.blob_path, nullptr, NamesAndTypesList{}, ctx, &read_keys); } else { file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys); + object_storage, configuration.connection_params.getContainer(), configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys); } ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, format, configuration, format_settings, read_keys, ctx); diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 20e7f4a6c90..affa02928b6 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -1,5 +1,7 @@ #pragma once +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" +#include "Interpreters/Context_fwd.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -37,21 +39,13 @@ public: bool withWildcard() const { - static const String PARTITION_ID_WILDCARD = "{_partition_id}"; + static constexpr auto PARTITION_ID_WILDCARD = "{_partition_id}"; return blobs_paths.back().find(PARTITION_ID_WILDCARD) != String::npos; } - Poco::URI getConnectionURL() const; - - std::string connection_url; - bool is_connection_string; - - std::optional account_name; - std::optional account_key; - - std::string container; std::string blob_path; std::vector blobs_paths; + AzureBlobStorage::ConnectionParams connection_params; }; StorageAzureBlob( @@ -67,16 +61,10 @@ public: ASTPtr partition_by_); static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context); - static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container = true); + static AzureBlobStorage::ConnectionParams getConnectionParams(const String & connection_url, const String & container_name, const std::optional & account_name, const std::optional & account_key, const ContextPtr & local_context); + static void processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection, const ContextPtr & local_context); - static AzureObjectStorage::SettingsPtr createSettings(const ContextPtr & local_context); - - static void processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection); - - String getName() const override - { - return name; - } + String getName() const override { return name; } void read( QueryPlan & query_plan, diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp index a80d121567a..6f6ae8763fd 100644 --- a/src/Storages/StorageAzureBlobCluster.cpp +++ b/src/Storages/StorageAzureBlobCluster.cpp @@ -1,4 +1,5 @@ #include "Storages/StorageAzureBlobCluster.h" +#include #include "config.h" @@ -41,7 +42,7 @@ StorageAzureBlobCluster::StorageAzureBlobCluster( , configuration{configuration_} , object_storage(std::move(object_storage_)) { - context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.getConnectionURL()); + context->getGlobalContext()->getRemoteHostFilter().checkURL(Poco::URI(configuration_.connection_params.getConnectionURL())); StorageInMemoryMetadata storage_metadata; if (columns_.empty()) @@ -79,7 +80,7 @@ void StorageAzureBlobCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, cons RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { auto iterator = std::make_shared( - object_storage.get(), configuration.container, configuration.blob_path, + object_storage.get(), configuration.connection_params.getContainer(), configuration.blob_path, predicate, getVirtualsList(), context, nullptr); auto callback = std::make_shared>([iterator]() mutable -> String{ return iterator->next().relative_path; }); diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp index 275cd2a9cbb..c471a72d8c7 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp @@ -35,16 +35,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -namespace -{ - -bool isConnectionString(const std::string & candidate) -{ - return !candidate.starts_with("http"); -} - -} - void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context) { /// Supported signatures: @@ -54,7 +44,7 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) { - StorageAzureBlob::processNamedCollectionResult(configuration, *named_collection); + StorageAzureBlob::processNamedCollectionResult(configuration, *named_collection, local_context); configuration.blobs_paths = {configuration.blob_path}; @@ -74,14 +64,14 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const std::unordered_map engine_args_to_idx; - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); - - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); + String connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); + String container = checkAndGetLiteralArgument(engine_args[1], "container"); configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); - auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); }; + std::optional account_name; + std::optional account_key; + + auto is_format_arg = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); }; if (engine_args.size() == 4) { @@ -105,8 +95,8 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const } else { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); } } else if (engine_args.size() == 6) @@ -120,8 +110,9 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const } else { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name/structure"); if (is_format_arg(sixth_arg)) configuration.format = sixth_arg; @@ -132,28 +123,33 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const else if (engine_args.size() == 7) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); if (!is_format_arg(sixth_arg)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + configuration.format = sixth_arg; configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); } else if (engine_args.size() == 8) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); if (!is_format_arg(sixth_arg)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + configuration.format = sixth_arg; configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); configuration.structure = checkAndGetLiteralArgument(engine_args[7], "structure"); } configuration.blobs_paths = {configuration.blob_path}; + configuration.connection_params = StorageAzureBlob::getConnectionParams(connection_url, container, account_name, account_key, local_context); if (configuration.format == "auto") configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); @@ -330,12 +326,19 @@ ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(Contex if (configuration.structure == "auto") { context->checkAccess(getSourceAccessType()); - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); - auto object_storage = std::make_unique("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container); + auto client = AzureBlobStorage::getContainerClient(configuration.connection_params, !is_insert_query); + auto settings = AzureBlobStorage::getRequestSettings(context->getSettingsRef()); + + auto object_storage = std::make_unique( + "AzureBlobStorageTableFunction", + std::move(client), + std::move(settings), + configuration.connection_params.getContainer()); + if (configuration.format == "auto") return StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, std::nullopt, context).first; + return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context); } @@ -354,8 +357,8 @@ std::unordered_set TableFunctionAzureBlobStorage::getVirtualsToCheckBefo StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); + auto client = AzureBlobStorage::getContainerClient(configuration.connection_params, !is_insert_query); + auto settings = AzureBlobStorage::getRequestSettings(context->getSettingsRef()); ColumnsDescription columns; if (configuration.structure != "auto") @@ -365,7 +368,7 @@ StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_funct StoragePtr storage = std::make_shared( configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.connection_params.getContainer()), context, StorageID(getDatabaseName(), table_name), columns, diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp index 04dddca7672..fb311c74657 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp @@ -1,3 +1,4 @@ +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -31,30 +32,30 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( columns = structure_hint; } - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); + auto settings = AzureBlobStorage::getRequestSettings(context->getSettingsRef()); + auto client = AzureBlobStorage::getContainerClient(configuration.connection_params, !is_insert_query); if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) { /// On worker node this filename won't contains globs storage = std::make_shared( configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.connection_params.getContainer()), context, StorageID(getDatabaseName(), table_name), columns, ConstraintsDescription{}, - /* comment */String{}, - /* format_settings */std::nullopt, /// No format_settings - /* distributed_processing */ true, - /*partition_by_=*/nullptr); + /*comment=*/ String{}, + /*format_settings=*/ std::nullopt, /// No format_settings + /*distributed_processing=*/ true, + /*partition_by=*/ nullptr); } else { storage = std::make_shared( cluster_name, configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.connection_params.getContainer()), StorageID(getDatabaseName(), table_name), columns, ConstraintsDescription{}, From de3d95a7f05156330dc3cbad3dee7265a027b074 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Sat, 11 May 2024 21:00:08 +0000 Subject: [PATCH 052/439] fix style check --- src/Core/Settings.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a04d7f54884..1776163688e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -117,8 +117,8 @@ class IColumn; M(UInt64, azure_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write", 0) \ M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ M(UInt64, azure_sdk_max_retries, 10, "Maximum number of retries in azure sdk", 0) \ - M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff beetween retries in azure sdk", 0) \ - M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff beetween retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff between retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff between retries in azure sdk", 0) \ M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \ M(Bool, azure_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in azure engine tables", 0) \ M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \ From 9ef86e948ecb1408d6ce8df2e2602584d591252c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 13 May 2024 16:07:28 +0000 Subject: [PATCH 053/439] fix tests --- src/Backups/BackupIO_AzureBlobStorage.cpp | 1 - src/Backups/BackupIO_AzureBlobStorage.h | 5 +---- .../registerBackupEngineAzureBlobStorage.cpp | 3 +-- .../AzureBlobStorageCommon.cpp | 4 ---- .../AzureBlobStorage/AzureBlobStorageCommon.h | 6 +----- src/Storages/StorageAzureBlob.cpp | 19 +++++++------------ src/Storages/StorageAzureBlob.h | 3 --- .../TableFunctionAzureBlobStorageCluster.cpp | 4 ---- .../test_storage_azure_blob_storage/test.py | 2 +- 9 files changed, 11 insertions(+), 36 deletions(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 3f60ed5c0b4..6ae67ad5dfc 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h index 0829c3258c9..8f0a6e8fb5d 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.h +++ b/src/Backups/BackupIO_AzureBlobStorage.h @@ -1,13 +1,10 @@ #pragma once - -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE #include #include -#include -#include +#include namespace DB diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 6974d16e2f6..98920d80662 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -1,4 +1,3 @@ -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #include @@ -6,7 +5,7 @@ #if USE_AZURE_BLOB_STORAGE #include -#include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp index 76054efff19..a39cc89b93b 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp @@ -1,8 +1,4 @@ -#include -#include -#include #include -#include #if USE_AZURE_BLOB_STORAGE diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h index 7e716adf4d0..5f9f280ad4a 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h @@ -1,8 +1,4 @@ #pragma once - -#include -#include -#include "base/strong_typedef.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -15,10 +11,10 @@ #include #include -#include #include #include #include +#include namespace DB { diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 2341b8dc94e..a82de72af6d 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -1,6 +1,4 @@ -#include #include -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #if USE_AZURE_BLOB_STORAGE #include @@ -44,6 +42,7 @@ #include #include +#include #include @@ -1307,8 +1306,7 @@ namespace if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure) return; - const auto & params = configuration.connection_params; - String source = fs::path(params.getConnectionURL()) / params.getContainer() / current_path_with_metadata.relative_path; + String source = fs::path(configuration.connection_params.endpoint.getEndpoint()) / current_path_with_metadata.relative_path; auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows); } @@ -1319,8 +1317,7 @@ namespace || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) return; - const auto & params = configuration.connection_params; - String source = fs::path(params.getConnectionURL()) / params.getContainer() / current_path_with_metadata.relative_path; + String source = fs::path(configuration.connection_params.endpoint.getEndpoint()) / current_path_with_metadata.relative_path; auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addColumns(key, columns); } @@ -1331,12 +1328,11 @@ namespace || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) return; - const auto & params = configuration.connection_params; - auto host_and_bucket = params.getConnectionURL() + '/' + params.getContainer(); + auto endpoint = fs::path(configuration.connection_params.endpoint.getEndpoint()); Strings sources; sources.reserve(read_keys.size()); - std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; }); + std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem) { return endpoint / elem.relative_path; }); auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); } @@ -1376,9 +1372,8 @@ namespace return std::nullopt; }; - const auto & params = configuration.connection_params; - auto host_and_bucket = params.getConnectionURL() + '/' + params.getContainer(); - String source = host_and_bucket + '/' + it->relative_path; + auto endpoint = fs::path(configuration.connection_params.endpoint.getEndpoint()); + String source = endpoint / it->relative_path; if (format) { diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 936f32c3cb0..396934b4212 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -1,7 +1,4 @@ #pragma once - -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" -#include "Interpreters/Context_fwd.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp index fb311c74657..d72735bb47b 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp @@ -1,4 +1,3 @@ -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -7,11 +6,8 @@ #include #include #include - #include "registerTableFunctions.h" -#include - namespace DB { diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 78aaf26a2a7..c3204808d6f 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -789,7 +789,7 @@ def test_read_subcolumns(cluster): def test_read_from_not_existing_container(cluster): node = cluster.instances["node"] query = ( - f"select * from azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont_not_exists', 'test_table.csv', " + f"select * from azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont-not-exists', 'test_table.csv', " f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto')" ) expected_err_msg = "container does not exist" From d7de2ae0c9c37e9079ec575dc0a7ffeae5394206 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 13 May 2024 16:31:13 +0000 Subject: [PATCH 054/439] remove optimization for old analyzer --- .../RewriteAggregateFunctionWithIfPass.cpp | 26 +-- .../RewriteFunctionToSubcolumnVisitor.cpp | 216 ------------------ .../RewriteFunctionToSubcolumnVisitor.h | 52 ----- src/Interpreters/TreeOptimizer.cpp | 61 ----- .../01872_functions_to_subcolumns.reference | 47 ---- .../01872_functions_to_subcolumns.sql | 41 ---- .../0_stateless/02115_map_contains.reference | 4 - .../0_stateless/02115_map_contains.sql | 12 - .../0_stateless/02116_tuple_element.reference | 25 -- .../0_stateless/02116_tuple_element.sql | 42 ---- ...tions_to_subcolumns_column_names.reference | 5 - ...1_functions_to_subcolumns_column_names.sql | 6 - ...2971_functions_to_subcolumns_map.reference | 12 - .../02971_functions_to_subcolumns_map.sql | 12 - ..._functions_to_subcolumns_variant.reference | 4 - .../02971_functions_to_subcolumns_variant.sql | 6 - .../03003_functions_to_subcolumns_final.sql | 1 + 17 files changed, 2 insertions(+), 570 deletions(-) delete mode 100644 src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp delete mode 100644 src/Interpreters/RewriteFunctionToSubcolumnVisitor.h delete mode 100644 tests/queries/0_stateless/01872_functions_to_subcolumns.reference delete mode 100644 tests/queries/0_stateless/01872_functions_to_subcolumns.sql delete mode 100644 tests/queries/0_stateless/02115_map_contains.reference delete mode 100644 tests/queries/0_stateless/02115_map_contains.sql delete mode 100644 tests/queries/0_stateless/02116_tuple_element.reference delete mode 100644 tests/queries/0_stateless/02116_tuple_element.sql diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index 365bc28431a..58045c935aa 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -12,10 +12,7 @@ #include #include -<<<<<<< HEAD -======= #include ->>>>>>> upstream/master #include namespace DB @@ -102,16 +99,8 @@ public: FunctionFactory::instance().get("not", getContext())->build(not_function->getArgumentColumns())); new_arguments[1] = std::move(not_function); -<<<<<<< HEAD - function_arguments_nodes.resize(2); - function_arguments_nodes[0] = std::move(if_arguments_nodes[2]); - function_arguments_nodes[1] = std::move(not_function); - resolveAsAggregateFunctionWithIf(*function_node); -======= function_arguments_nodes = std::move(new_arguments); - resolveAsAggregateFunctionWithIf( - *function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()}); ->>>>>>> upstream/master + resolveAsAggregateFunctionWithIf(*function_node); } } } @@ -120,21 +109,8 @@ private: static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node) { auto result_type = function_node.getResultType(); -<<<<<<< HEAD const auto * suffix = result_type->isNullable() ? "OrNullIf" : "If"; resolveAggregateFunctionNodeByName(function_node, function_node.getFunctionName() + suffix); -======= - - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( - function_node.getFunctionName() + "If", - function_node.getNullsAction(), - argument_types, - function_node.getAggregateFunction()->getParameters(), - properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); ->>>>>>> upstream/master } }; diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp deleted file mode 100644 index b1c79d4ecb7..00000000000 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ /dev/null @@ -1,216 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace -{ - -ASTPtr transformToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - return std::make_shared(Nested::concatenateName(name_in_storage, subcolumn_name)); -} - -ASTPtr transformEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("equals", ast, std::make_shared(0u)); -} - -ASTPtr transformNotEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("notEquals", ast, std::make_shared(0u)); -} - -ASTPtr transformIsNotNullToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("not", ast); -} - -ASTPtr transformCountNullableToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("sum", makeASTFunction("not", ast)); -} - -const std::unordered_map, String, decltype(&transformToSubcolumn)>> unary_function_to_subcolumn = -{ - {"length", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformToSubcolumn}}, - {"empty", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformEmptyToSubcolumn}}, - {"notEmpty", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformNotEmptyToSubcolumn}}, - {"isNull", {{TypeIndex::Nullable}, "null", transformToSubcolumn}}, - {"isNotNull", {{TypeIndex::Nullable}, "null", transformIsNotNullToSubcolumn}}, - {"count", {{TypeIndex::Nullable}, "null", transformCountNullableToSubcolumn}}, - {"mapKeys", {{TypeIndex::Map}, "keys", transformToSubcolumn}}, - {"mapValues", {{TypeIndex::Map}, "values", transformToSubcolumn}}, -}; - -std::optional getColumnFromArgumentsToOptimize( - const ASTs & arguments, - const StorageMetadataPtr & metadata_snapshot) -{ - if (arguments.empty() || arguments.size() > 2) - return {}; - - const auto * identifier = arguments[0]->as(); - if (!identifier) - return {}; - - const auto & columns = metadata_snapshot->getColumns(); - const auto & name_in_storage = identifier->name(); - - if (!columns.has(name_in_storage)) - return {}; - - const auto & column_type = columns.get(name_in_storage).type; - if (column_type->hasDynamicSubcolumns()) - return {}; - - return NameAndTypePair{name_in_storage, column_type}; -} - -} - -void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTPtr & ast, Data & data) -{ - if (const auto * identifier = ast->as()) - { - ++data.indentifiers_count[identifier->name()]; - return; - } - - if (const auto * function = ast->as()) - { - visit(*function, data); - return; - } -} - -void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & function, Data & data) -{ - const auto & arguments = function.arguments->children; - auto column = getColumnFromArgumentsToOptimize(arguments, data.metadata_snapshot); - if (!column) - return; - - auto column_type_id = column->type->getTypeId(); - - if (arguments.size() == 1) - { - auto it = unary_function_to_subcolumn.find(function.name); - if (it == unary_function_to_subcolumn.end()) - return; - - const auto & expected_types_id = std::get<0>(it->second); - if (expected_types_id.contains(column_type_id)) - ++data.optimized_identifiers_count[column->name]; - } - else if (arguments.size() == 2) - { - if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) - { - const auto * literal = arguments[1]->as(); - if (!literal) - return; - - auto value_type = literal->value.getType(); - if (value_type == Field::Types::UInt64 || value_type == Field::Types::String) - ++data.optimized_identifiers_count[column->name]; - } - else if (function.name == "variantElement" && column_type_id == TypeIndex::Variant) - { - const auto * literal = arguments[1]->as(); - if (literal && literal->value.getType() == Field::Types::String) - ++data.optimized_identifiers_count[column->name]; - } - else if (function.name == "mapContains" && column_type_id == TypeIndex::Map) - { - ++data.optimized_identifiers_count[column->name]; - } - } -} - -void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, ASTPtr & ast) const -{ - const auto & arguments = function.arguments->children; - auto column = getColumnFromArgumentsToOptimize(arguments, metadata_snapshot); - if (!column) - return; - - auto column_type_id = column->type->getTypeId(); - auto alias = function.getAliasOrColumnName(); - - if (arguments.size() == 1) - { - auto it = unary_function_to_subcolumn.find(function.name); - if (it == unary_function_to_subcolumn.end()) - return; - - const auto & [expected_types_id, subcolumn_name, transformer] = it->second; - if (!expected_types_id.contains(column_type_id)) - return; - - ast = transformer(column->name, subcolumn_name); - ast->setAlias(alias); - } - else if (arguments.size() == 2) - { - if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) - { - const auto * literal = arguments[1]->as(); - if (!literal) - return; - - String subcolumn_name; - auto value_type = literal->value.getType(); - if (value_type == Field::Types::UInt64) - { - const auto & type_tuple = assert_cast(*column->type); - auto index = literal->value.get(); - subcolumn_name = type_tuple.getNameByPosition(index); - } - else if (value_type == Field::Types::String) - { - subcolumn_name = literal->value.get(); - } - else - { - return; - } - - ast = transformToSubcolumn(column->name, subcolumn_name); - ast->setAlias(alias); - } - else if (function.name == "variantElement" && column_type_id == TypeIndex::Variant) - { - const auto * literal = arguments[1]->as(); - if (!literal) - return; - - String subcolumn_name; - auto value_type = literal->value.getType(); - if (value_type != Field::Types::String) - return; - - subcolumn_name = literal->value.get(); - ast = transformToSubcolumn(column->name, subcolumn_name); - ast->setAlias(alias); - } - else if (function.name == "mapContains" && column_type_id == TypeIndex::Map) - { - auto subcolumn = transformToSubcolumn(column->name, "keys"); - ast = makeASTFunction("has", subcolumn, arguments[1]); - ast->setAlias(alias); - } - } -} - -} diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h deleted file mode 100644 index 08eb6e27c52..00000000000 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class ASTFunction; -class ASTIdentifier; - -/// Collects info about identifiers to select columns to optimize to subcolumns. -class RewriteFunctionToSubcolumnFirstPassMatcher -{ -public: - struct Data - { - explicit Data(StorageMetadataPtr metadata_snapshot_) : metadata_snapshot(std::move(metadata_snapshot_)) {} - - StorageMetadataPtr metadata_snapshot; - std::unordered_map indentifiers_count; - std::unordered_map optimized_identifiers_count; - }; - - static void visit(const ASTPtr & ast, Data & data); - static void visit(const ASTFunction & function, Data & data); - static bool needChildVisit(ASTPtr & , ASTPtr &) { return true; } -}; - -using RewriteFunctionToSubcolumnFirstPassVisitor = InDepthNodeVisitor; - -/// Rewrites functions to subcolumns, if possible, to reduce amount of read data. -/// E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' -class RewriteFunctionToSubcolumnSecondPassData -{ -public: - using TypeToVisit = ASTFunction; - void visit(ASTFunction & function, ASTPtr & ast) const; - - RewriteFunctionToSubcolumnSecondPassData(StorageMetadataPtr metadata_snapshot_, NameSet identifiers_to_optimize_) - : metadata_snapshot(std::move(metadata_snapshot_)), identifiers_to_optimize(std::move(identifiers_to_optimize_)) - { - } - - StorageMetadataPtr metadata_snapshot; - NameSet identifiers_to_optimize; -}; - -using RewriteFunctionToSubcolumnSecondPassMatcher = OneTypeMatcher; -using RewriteFunctionToSubcolumnSecondPassVisitor = InDepthNodeVisitor; - -} diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index d01a922bfd0..b88d75cd5a2 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -564,63 +563,6 @@ void transformIfStringsIntoEnum(ASTPtr & query) ConvertStringsToEnumVisitor(convert_data).visit(query); } -void optimizeFunctionsToSubcolumns(ASTPtr & query, const TreeRewriterResult & result) -{ - if (!result.storage || !result.storage->supportsOptimizationToSubcolumns() || !result.storage_snapshot) - return; - - const auto & metadata_snapshot = result.storage_snapshot->metadata; - const auto & select_query = assert_cast(*query); - - /// For queries with FINAL converting function to subcolumn may alter - /// special merging algorithms and produce wrong result of query. - if (select_query.final()) - return; - - NameSet all_key_columns; - - const auto & primary_key_columns = result.storage_snapshot->metadata->getColumnsRequiredForPrimaryKey(); - all_key_columns.insert(primary_key_columns.begin(), primary_key_columns.end()); - - const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); - all_key_columns.insert(partition_key_columns.begin(), partition_key_columns.end()); - - for (const auto & index : metadata_snapshot->getSecondaryIndices()) - { - const auto & index_columns = index.expression->getRequiredColumns(); - all_key_columns.insert(index_columns.begin(), index_columns.end()); - } - - /// Do not optimize if full column is requested in other context. - /// It doesn't make sense because it doesn't reduce amount of read data - /// and optimized functions are not computation heavy. But introducing - /// new identifier complicates query analysis and may break it. - /// - /// E.g. query: - /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) - /// may be optimized to incorrect query: - /// SELECT n FROM table GROUP BY n HAVING not(n.null) - /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) - /// - /// Do not optimize index columns (primary, min-max, secondary), - /// because otherwise analysis of indexes may be broken. - /// TODO: handle subcolumns in index analysis. - - RewriteFunctionToSubcolumnFirstPassVisitor::Data data(metadata_snapshot); - RewriteFunctionToSubcolumnFirstPassVisitor(data).visit(query); - - NameSet identifiers_to_optimize; - for (const auto & [identifier, count] : data.optimized_identifiers_count) - if (!all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) - identifiers_to_optimize.insert(identifier); - - if (identifiers_to_optimize.empty()) - return; - - RewriteFunctionToSubcolumnSecondPassVisitor::Data rewrite_data(metadata_snapshot, identifiers_to_optimize); - RewriteFunctionToSubcolumnSecondPassVisitor(rewrite_data).visit(query); -} - void optimizeOrLikeChain(ASTPtr & query) { ConvertFunctionOrLikeVisitor::Data data = {}; @@ -685,9 +627,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, if (!select_query) throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts."); - if (settings.optimize_functions_to_subcolumns) - optimizeFunctionsToSubcolumns(query, result); - /// Move arithmetic operations out of aggregation functions if (settings.optimize_arithmetic_operations_in_aggregate_functions) optimizeAggregationFunctions(query); diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns.reference deleted file mode 100644 index 8c4017d6030..00000000000 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns.reference +++ /dev/null @@ -1,47 +0,0 @@ -0 0 1 -0 1 0 -SELECT - id IS NULL, - `n.null` AS `isNull(n)`, - NOT `n.null` AS `isNotNull(n)` -FROM t_func_to_subcolumns -3 0 1 0 -0 1 0 \N -SELECT - `arr.size0` AS `length(arr)`, - `arr.size0` = 0 AS `empty(arr)`, - `arr.size0` != 0 AS `notEmpty(arr)`, - empty(n) -FROM t_func_to_subcolumns -['foo','bar'] [1,2] -[] [] -SELECT - `m.keys` AS `mapKeys(m)`, - `m.values` AS `mapValues(m)` -FROM t_func_to_subcolumns -1 -SELECT sum(NOT `n.null`) AS `count(n)` -FROM t_func_to_subcolumns -2 -SELECT count(id) -FROM t_func_to_subcolumns -1 0 0 -2 1 0 -3 0 0 -SELECT - id, - `n.null` AS `isNull(n)`, - right.n IS NULL -FROM t_func_to_subcolumns AS left -ALL FULL OUTER JOIN -( - SELECT - 1 AS id, - \'qqq\' AS n - UNION ALL - SELECT - 3 AS id, - \'www\' -) AS right USING (id) -0 10 -0 20 diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns.sql b/tests/queries/0_stateless/01872_functions_to_subcolumns.sql deleted file mode 100644 index 45f83bf20e5..00000000000 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns.sql +++ /dev/null @@ -1,41 +0,0 @@ -DROP TABLE IF EXISTS t_func_to_subcolumns; - -SET optimize_functions_to_subcolumns = 1; - -CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) -ENGINE = MergeTree ORDER BY tuple(); - -INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map()); - -SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; - -SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; - -SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; - -SELECT count(n) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT count(n) FROM t_func_to_subcolumns; - -SELECT count(id) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT count(id) FROM t_func_to_subcolumns; - -SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left -FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); - -EXPLAIN SYNTAX SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left -FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); - -DROP TABLE t_func_to_subcolumns; - -DROP TABLE IF EXISTS t_tuple_null; - -CREATE TABLE t_tuple_null (t Tuple(null UInt32)) ENGINE = MergeTree ORDER BY tuple(); - -INSERT INTO t_tuple_null VALUES ((10)), ((20)); - -SELECT t IS NULL, t.null FROM t_tuple_null; - -DROP TABLE t_tuple_null; diff --git a/tests/queries/0_stateless/02115_map_contains.reference b/tests/queries/0_stateless/02115_map_contains.reference deleted file mode 100644 index e4ae4f951ba..00000000000 --- a/tests/queries/0_stateless/02115_map_contains.reference +++ /dev/null @@ -1,4 +0,0 @@ -SELECT has(`m.keys`, \'a\') AS `mapContains(m, \'a\')` -FROM t_map_contains -1 -0 diff --git a/tests/queries/0_stateless/02115_map_contains.sql b/tests/queries/0_stateless/02115_map_contains.sql deleted file mode 100644 index 3c7f21cb4f1..00000000000 --- a/tests/queries/0_stateless/02115_map_contains.sql +++ /dev/null @@ -1,12 +0,0 @@ -DROP TABLE IF EXISTS t_map_contains; - -CREATE TABLE t_map_contains (m Map(String, UInt32)) ENGINE = Memory; - -INSERT INTO t_map_contains VALUES (map('a', 1, 'b', 2)), (map('c', 3, 'd', 4)); - -SET optimize_functions_to_subcolumns = 1; - -EXPLAIN SYNTAX SELECT mapContains(m, 'a') FROM t_map_contains; -SELECT mapContains(m, 'a') FROM t_map_contains; - -DROP TABLE t_map_contains; diff --git a/tests/queries/0_stateless/02116_tuple_element.reference b/tests/queries/0_stateless/02116_tuple_element.reference deleted file mode 100644 index a8004f5e74c..00000000000 --- a/tests/queries/0_stateless/02116_tuple_element.reference +++ /dev/null @@ -1,25 +0,0 @@ -1 -SELECT `t1.a` AS `tupleElement(t1, 1)` -FROM t_tuple_element -a -SELECT `t1.s` AS `tupleElement(t1, 2)` -FROM t_tuple_element -1 -SELECT `t1.a` AS `tupleElement(t1, \'a\')` -FROM t_tuple_element -2 -SELECT `t2.1` AS `tupleElement(t2, 1)` -FROM t_tuple_element -2 -SELECT `t2.1` AS `tupleElement(t2, 1)` -FROM t_tuple_element -1 2 -WITH (1, 2) AS t -SELECT - t.1, - t.2 -1 2 -WITH CAST(\'(1, 2)\', \'Tuple(a UInt32, b UInt32)\') AS t -SELECT - t.1, - tupleElement(t, \'b\') diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element.sql deleted file mode 100644 index e3a5134f2b2..00000000000 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ /dev/null @@ -1,42 +0,0 @@ -DROP TABLE IF EXISTS t_tuple_element; - -CREATE TABLE t_tuple_element(t1 Tuple(a UInt32, s String), t2 Tuple(UInt32, String)) ENGINE = Memory; -INSERT INTO t_tuple_element VALUES ((1, 'a'), (2, 'b')); - -SET optimize_functions_to_subcolumns = 1; - -SELECT t1.1 FROM t_tuple_element; -EXPLAIN SYNTAX SELECT t1.1 FROM t_tuple_element; - -SELECT tupleElement(t1, 2) FROM t_tuple_element; -EXPLAIN SYNTAX SELECT tupleElement(t1, 2) FROM t_tuple_element; - -SELECT tupleElement(t1, 'a') FROM t_tuple_element; -EXPLAIN SYNTAX SELECT tupleElement(t1, 'a') FROM t_tuple_element; - -SELECT tupleElement(number, 1) FROM numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } - -SELECT t2.1 FROM t_tuple_element; -EXPLAIN SYNTAX SELECT t2.1 FROM t_tuple_element; - -SELECT tupleElement(t2, 1) FROM t_tuple_element; -EXPLAIN SYNTAX SELECT tupleElement(t2, 1) FROM t_tuple_element; - -SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } - -DROP TABLE t_tuple_element; - -WITH (1, 2) AS t SELECT t.1, t.2; -EXPLAIN SYNTAX WITH (1, 2) AS t SELECT t.1, t.2; - -WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); -EXPLAIN SYNTAX WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference index 03c16267db1..3389ea44074 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference @@ -1,8 +1,3 @@ -SELECT - `arr.size0` AS `length(arr)`, - `n.null` AS `isNull(n)` -FROM t_column_names -{"length(arr)":"3","isNull(n)":0} SELECT __table1.`arr.size0` AS `length(arr)`, __table1.`n.null` AS `isNull(n)` diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql index b867148c8ca..48e5232d18b 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql @@ -4,12 +4,6 @@ CREATE TABLE t_column_names (arr Array(UInt64), n Nullable(String)) ENGINE = Mem INSERT INTO t_column_names VALUES ([1, 2, 3], 'foo'); -SET optimize_functions_to_subcolumns = 1; -SET allow_experimental_analyzer = 0; - -EXPLAIN SYNTAX SELECT length(arr), isNull(n) FROM t_column_names; -SELECT length(arr), isNull(n) FROM t_column_names FORMAT JSONEachRow; - SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference index 50f21842ac1..9488291c8ff 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference @@ -1,15 +1,3 @@ -SELECT `m.size0` AS `length(m)` -FROM t_func_to_subcolumns_map -2 -1 -SELECT `m.size0` = 0 AS `empty(m)` -FROM t_func_to_subcolumns_map -0 -0 -SELECT `m.size0` != 0 AS `notEmpty(m)` -FROM t_func_to_subcolumns_map -1 -1 SELECT __table1.`m.size0` AS `length(m)` FROM default.t_func_to_subcolumns_map AS __table1 2 diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql index c574e1033c0..e8a752a82d5 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql @@ -4,18 +4,6 @@ CREATE TABLE t_func_to_subcolumns_map (id UInt64, m Map(String, UInt64)) ENGINE INSERT INTO t_func_to_subcolumns_map VALUES (1, map('aaa', 1, 'bbb', 2)) (2, map('ccc', 3)); -SET optimize_functions_to_subcolumns = 1; -SET allow_experimental_analyzer = 0; - -EXPLAIN SYNTAX SELECT length(m) FROM t_func_to_subcolumns_map; -SELECT length(m) FROM t_func_to_subcolumns_map; - -EXPLAIN SYNTAX SELECT empty(m) FROM t_func_to_subcolumns_map; -SELECT empty(m) FROM t_func_to_subcolumns_map; - -EXPLAIN SYNTAX SELECT notEmpty(m) FROM t_func_to_subcolumns_map; -SELECT notEmpty(m) FROM t_func_to_subcolumns_map; - SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference index 7a52155fc2d..04616738a15 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference @@ -1,7 +1,3 @@ -SELECT `v.String` AS `variantElement(v, \'String\')` -FROM t_func_to_subcolumns_variant -foo -\N SELECT __table1.`v.String` AS `variantElement(v, \'String\')` FROM default.t_func_to_subcolumns_variant AS __table1 foo diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql index 1cedd877289..511bcc44514 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql @@ -6,12 +6,6 @@ CREATE TABLE t_func_to_subcolumns_variant (id UInt64, v Variant(String, UInt64)) INSERT INTO t_func_to_subcolumns_variant VALUES (1, 'foo') (2, 111); -SET optimize_functions_to_subcolumns = 1; -SET allow_experimental_analyzer = 0; - -EXPLAIN SYNTAX SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; -SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; - SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql index 5975347ad09..3fe29139c5f 100644 --- a/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql +++ b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql @@ -1,6 +1,7 @@ DROP TABLE IF EXISTS t_length_1; DROP TABLE IF EXISTS t_length_2; +SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; SET optimize_on_insert = 0; From a8f0cfe580291c8612c43e5396693fe9cd4d463e Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 13 May 2024 19:54:29 +0200 Subject: [PATCH 055/439] Fixes --- .../DataLakes/DeltaLakeMetadataParser.cpp | 76 ++++++++++++------- .../DataLakes/DeltaLakeMetadataParser.h | 3 + src/Storages/DataLakes/HudiMetadataParser.h | 3 + src/Storages/DataLakes/IStorageDataLake.h | 40 ++++++++-- src/Storages/StorageS3.cpp | 12 ++- src/Storages/StorageS3.h | 6 ++ tests/integration/test_storage_delta/test.py | 16 ++-- 7 files changed, 116 insertions(+), 40 deletions(-) diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp index 50b6ca83cb4..30a617f3460 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp @@ -44,6 +44,7 @@ namespace ErrorCodes extern const int INCORRECT_DATA; extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; + extern const int NOT_IMPLEMENTED; } template @@ -184,6 +185,10 @@ struct DeltaLakeMetadataParser::Impl Poco::Dynamic::Var json = parser.parse(json_str); Poco::JSON::Object::Ptr object = json.extract(); + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + object->stringify(oss); + LOG_TEST(log, "Metadata: {}", oss.str()); + if (object->has("add")) { auto add_object = object->get("add").extract(); @@ -218,34 +223,49 @@ struct DeltaLakeMetadataParser::Impl auto path = object->get("remove").extract()->getValue("path"); result.erase(fs::path(configuration.getPath()) / path); } - if (file_schema.empty()) + if (object->has("metaData")) { - // std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - // object->stringify(oss); - // LOG_TEST(log, "Metadata: {}", oss.str()); + const auto metadata_object = object->get("metaData").extract(); + const auto schema_object = metadata_object->getValue("schemaString"); - if (object->has("metaData")) + Poco::JSON::Parser p; + Poco::Dynamic::Var fields_json = parser.parse(schema_object); + Poco::JSON::Object::Ptr fields_object = fields_json.extract(); + + const auto fields = fields_object->get("fields").extract(); + NamesAndTypesList current_schema; + for (size_t i = 0; i < fields->size(); ++i) { - const auto metadata_object = object->get("metaData").extract(); - const auto schema_object = metadata_object->getValue("schemaString"); + const auto field = fields->getObject(static_cast(i)); + auto name = field->getValue("name"); + auto type = field->getValue("type"); + auto is_nullable = field->getValue("nullable"); - Poco::JSON::Parser p; - Poco::Dynamic::Var fields_json = parser.parse(schema_object); - Poco::JSON::Object::Ptr fields_object = fields_json.extract(); + std::string physical_name; + auto schema_metadata_object = field->get("metadata").extract(); + if (schema_metadata_object->has("delta.columnMapping.physicalName")) + physical_name = schema_metadata_object->getValue("delta.columnMapping.physicalName"); + else + physical_name = name; - const auto fields = fields_object->get("fields").extract(); - for (size_t i = 0; i < fields->size(); ++i) - { - const auto field = fields->getObject(static_cast(i)); - auto name = field->getValue("name"); - auto type = field->getValue("type"); - auto is_nullable = field->getValue("nullable"); + LOG_TEST(log, "Found column: {}, type: {}, nullable: {}, physical name: {}", + name, type, is_nullable, physical_name); - file_schema.push_back({name, getFieldType(field, "type", is_nullable)}); - } + current_schema.push_back({physical_name, getFieldType(field, "type", is_nullable)}); + } + + if (file_schema.empty()) + { + file_schema = current_schema; + } + else if (file_schema != current_schema) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Reading from files with different schema is not possible " + "({} is different from {})", + file_schema.toString(), current_schema.toString()); } } - /// TODO: Check if schema in each file is the same? } } @@ -278,12 +298,20 @@ struct DeltaLakeMetadataParser::Impl return value; else if (which.isInt8()) return parse(value); + else if (which.isUInt8()) + return parse(value); else if (which.isInt16()) return parse(value); + else if (which.isUInt16()) + return parse(value); else if (which.isInt32()) return parse(value); + else if (which.isUInt32()) + return parse(value); else if (which.isInt64()) return parse(value); + else if (which.isUInt64()) + return parse(value); else if (which.isFloat32()) return parse(value); else if (which.isFloat64()) @@ -299,14 +327,6 @@ struct DeltaLakeMetadataParser::Impl readDateTime64Text(time, 6, in, assert_cast(data_type.get())->getTimeZone()); return time; } - // else if (which.isDecimal32()) - // return parse(value); - // else if (which.isDecimal64()) - // return parse(value); - // else if (which.isDecimal128()) - // return parse(value); - // else if (which.isDecimal256()) - // return parse(value); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type for {}", check_type->getColumnType()); } diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.h b/src/Storages/DataLakes/DeltaLakeMetadataParser.h index 58cf7acd2a3..701f15a8a9f 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.h +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.h @@ -20,10 +20,13 @@ public: DataLakePartitionColumns getPartitionColumns() const { return partition_columns; } + const std::unordered_map & getColumnNameToPhysicalNameMapping() const { return column_name_to_physical_name; } + private: struct Impl; std::shared_ptr impl; NamesAndTypesList schema; + std::unordered_map column_name_to_physical_name; DataLakePartitionColumns partition_columns; Strings data_files; }; diff --git a/src/Storages/DataLakes/HudiMetadataParser.h b/src/Storages/DataLakes/HudiMetadataParser.h index 9e2901a9d24..3f8631dba77 100644 --- a/src/Storages/DataLakes/HudiMetadataParser.h +++ b/src/Storages/DataLakes/HudiMetadataParser.h @@ -20,10 +20,13 @@ public: DataLakePartitionColumns getPartitionColumns() const { return {}; } + const std::unordered_map & getColumnNameToPhysicalNameMapping() const { return column_name_to_physical_name; } + private: struct Impl; std::shared_ptr impl; Strings data_files; + std::unordered_map column_name_to_physical_name; }; } diff --git a/src/Storages/DataLakes/IStorageDataLake.h b/src/Storages/DataLakes/IStorageDataLake.h index be23c017043..b0c5af1a6e4 100644 --- a/src/Storages/DataLakes/IStorageDataLake.h +++ b/src/Storages/DataLakes/IStorageDataLake.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "PartitionColumns.h" #include @@ -32,11 +33,13 @@ public: Args && ...args) { std::unique_ptr metadata; - Configuration read_configuration; + Configuration base_configuration{configuration_}; + base_configuration.update(context_); + Configuration read_configuration{base_configuration}; + try { - base_configuration.update(context_); metadata = std::make_unique(base_configuration, context_); read_configuration = getConfigurationForDataRead(*metadata, base_configuration, context_); } @@ -124,23 +127,48 @@ private: return configuration; } + ReadFromFormatInfo prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr local_context) override + { + auto info = DB::prepareReadingFromFormat(requested_columns, storage_snapshot, supports_subset_of_columns); + if (!metadata) + { + base_configuration.update(local_context); + metadata = std::make_unique(base_configuration, local_context); + } + auto column_mapping = metadata->getColumnNameToPhysicalNameMapping(); + if (!column_mapping.empty()) + { + for (const auto & [column_name, physical_name] : column_mapping) + { + auto & column = info.format_header.getByName(column_name); + column.name = physical_name; + } + } + return info; + } + void updateConfigurationImpl(const ContextPtr & local_context) { const bool updated = base_configuration.update(local_context); - auto metadata = MetadataParser(base_configuration, local_context); - auto new_keys = metadata.getFiles(); - Storage::partition_columns = metadata.getPartitionColumns(); + metadata = std::make_unique(base_configuration, local_context); + auto new_keys = metadata->getFiles(); + Storage::partition_columns = metadata->getPartitionColumns(); if (!updated && new_keys == Storage::getConfiguration().keys) return; - auto read_configuration = getConfigurationForDataRead(metadata, base_configuration, local_context); + auto read_configuration = getConfigurationForDataRead(*metadata, base_configuration, local_context); Storage::useConfiguration(read_configuration); } Configuration base_configuration; std::mutex configuration_update_mutex; + std::unique_ptr metadata; LoggerPtr log; }; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 205b5d3381e..fc05c40c38f 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1210,6 +1210,15 @@ bool StorageS3::parallelizeOutputAfterReading(ContextPtr context) const return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context); } +ReadFromFormatInfo StorageS3::prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr /* local_context */) +{ + return DB::prepareReadingFromFormat(requested_columns, storage_snapshot, supports_subset_of_columns); +} + void StorageS3::read( QueryPlan & query_plan, const Names & column_names, @@ -1220,7 +1229,8 @@ void StorageS3::read( size_t max_block_size, size_t num_streams) { - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); + auto read_from_format_info = prepareReadingFromFormat( + column_names, storage_snapshot, supportsSubsetOfColumns(local_context), local_context); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 9e1d9eb5aad..535c3a97a68 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -395,6 +395,12 @@ private: bool prefersLargeBlocks() const override; bool parallelizeOutputAfterReading(ContextPtr context) const override; + + virtual ReadFromFormatInfo prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr local_context); }; } diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index c6bb8fd8d69..4cb71895881 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -153,7 +153,7 @@ def test_single_log_file(started_cluster): bucket = started_cluster.minio_bucket TABLE_NAME = "test_single_log_file" - inserted_data = "SELECT number, toString(number + 1) FROM numbers(100)" + inserted_data = "SELECT number as a, toString(number + 1) as b FROM numbers(100)" parquet_data_path = create_initial_data_file( started_cluster, instance, inserted_data, TABLE_NAME ) @@ -520,7 +520,7 @@ def test_partition_columns(started_cluster): bucket = started_cluster.minio_bucket TABLE_NAME = "test_partition_columns" result_file = f"{TABLE_NAME}" - partition_column = "c" + partition_columns = ["b", "c", "d", "e"] delta_table = ( DeltaTable.create(spark) @@ -529,7 +529,9 @@ def test_partition_columns(started_cluster): .addColumn("a", "INT") .addColumn("b", "STRING") .addColumn("c", "DATE") - .partitionedBy(partition_column) + .addColumn("d", "INT") + .addColumn("e", "BOOLEAN") + .partitionedBy(partition_columns) .execute() ) num_rows = 9 @@ -539,6 +541,8 @@ def test_partition_columns(started_cluster): StructField("a", IntegerType()), StructField("b", StringType()), StructField("c", DateType()), + StructField("d", IntegerType()), + StructField("e", BooleanType()), ] ) @@ -548,11 +552,13 @@ def test_partition_columns(started_cluster): i, "test" + str(i), datetime.strptime(f"2000-01-0{i}", "%Y-%m-%d"), + i, + False, ) ] df = spark.createDataFrame(data=data, schema=schema) df.printSchema() - df.write.mode("append").format("delta").partitionBy(partition_column).save( + df.write.mode("append").format("delta").partitionBy(partition_columns).save( f"/{TABLE_NAME}" ) @@ -569,7 +575,7 @@ def test_partition_columns(started_cluster): assert ( result - == "a\tNullable(Int32)\t\t\t\t\t\nb\tNullable(String)\t\t\t\t\t\nc\tNullable(Date)" + == "a\tNullable(Int32)\t\t\t\t\t\nb\tNullable(String)\t\t\t\t\t\nc\tNullable(Date32)\t\t\t\t\t\nd\tNullable(Int32)\t\t\t\t\t\ne\tNullable(Bool)" ) result = int( From 35038f2458f2642700ee685a5c4f43729228d8a0 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 14 May 2024 14:23:24 +0000 Subject: [PATCH 056/439] rename parameter --- .../test_backup_restore_azure_blob_storage/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index 1a1458cb68e..6765a519a6d 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -30,12 +30,12 @@ def generate_cluster_def(port): DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:{port}/devstoreaccount1; - cont + cont CSV http://azurite1:{port}/devstoreaccount1 - cont + cont CSV devstoreaccount1 Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== From 0a1daa00e0f8b57f73e64a7f9794ce1e7cef2c86 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 14 May 2024 14:40:38 +0000 Subject: [PATCH 057/439] fix functions with if --- .../Passes/RewriteAggregateFunctionWithIfPass.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index 58045c935aa..45f3469b48e 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -71,7 +71,7 @@ public: new_arguments[1] = std::move(if_arguments_nodes[0]); function_arguments_nodes = std::move(new_arguments); - resolveAsAggregateFunctionWithIf(*function_node); + resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If"); } } else if (first_const_node) @@ -100,18 +100,10 @@ public: new_arguments[1] = std::move(not_function); function_arguments_nodes = std::move(new_arguments); - resolveAsAggregateFunctionWithIf(*function_node); + resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If"); } } } - -private: - static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node) - { - auto result_type = function_node.getResultType(); - const auto * suffix = result_type->isNullable() ? "OrNullIf" : "If"; - resolveAggregateFunctionNodeByName(function_node, function_node.getFunctionName() + suffix); - } }; } From 29250418cb5d666b81043695a1df09e4df94e539 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 17 May 2024 20:43:40 +0000 Subject: [PATCH 058/439] fix backward incompatibility --- .../AzureBlobStorage/AzureBlobStorageCommon.cpp | 15 +++++++++++++-- .../test.py | 4 ++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp index a39cc89b93b..11253d25e3d 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp @@ -92,6 +92,17 @@ Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String container_name; String prefix; + auto get_container_name = [&] + { + if (config.has(config_prefix + ".container_name")) + return config.getString(config_prefix + ".container_name"); + + if (config.has(config_prefix + ".container")) + return config.getString(config_prefix + ".container"); + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `container` or `container_name` parameter in config"); + }; + if (config.has(config_prefix + ".endpoint")) { String endpoint = config.getString(config_prefix + ".endpoint"); @@ -154,13 +165,13 @@ Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const else if (config.has(config_prefix + ".connection_string")) { storage_url = config.getString(config_prefix + ".connection_string"); - container_name = config.getString(config_prefix + ".container_name"); + container_name = get_container_name(); } else if (config.has(config_prefix + ".storage_account_url")) { storage_url = config.getString(config_prefix + ".storage_account_url"); validateStorageAccountUrl(storage_url); - container_name = config.getString(config_prefix + ".container_name"); + container_name = get_container_name(); } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `storage_account_url` or `connection_string` or `endpoint` in config"); diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index 6765a519a6d..1a1458cb68e 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -30,12 +30,12 @@ def generate_cluster_def(port): DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:{port}/devstoreaccount1; - cont + cont CSV http://azurite1:{port}/devstoreaccount1 - cont + cont CSV devstoreaccount1 Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== From 47c7b7fccf38f9ae3180e1a456262388bd39e129 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 19 Mar 2024 16:01:48 +0100 Subject: [PATCH 059/439] add tests for non replicated mt --- src/Core/Settings.h | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 18 ++- .../Transforms/buildPushingToViewsChain.cpp | 6 + src/Storages/MergeTree/MergeTreeDataWriter.h | 2 + src/Storages/MergeTree/MergeTreeSink.cpp | 26 ++++ .../MergeTree/MergedBlockOutputStream.cpp | 5 + .../MergeTree/ReplicatedMergeTreeSink.cpp | 2 +- ..._non_replicated_deduplication_mv.reference | 0 .../03008_non_replicated_deduplication_mv.sql | 93 ++++++++++++++ ...eduplication_mv_collision_in_dst.reference | 0 ...ated_deduplication_mv_collision_in_dst.sql | 113 +++++++++++++++++ ...lision_in_dst_from_different_src.reverence | 0 ...mv_collision_in_dst_from_different_src.sql | 119 ++++++++++++++++++ ...eduplication_mv_collision_in_src.reference | 0 ...ated_deduplication_mv_collision_in_src.sql | 76 +++++++++++ 15 files changed, 456 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c555b5cb208..491e888e3e0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -34,7 +34,7 @@ class IColumn; M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ - M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ + M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size in rows for reading", 0) \ M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \ M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \ M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \ diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 12677c422b8..d1a9ead480e 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -306,6 +306,9 @@ Chain InterpreterInsertQuery::buildSink( ThreadGroupPtr running_group, std::atomic_uint64_t * elapsed_counter_ms) { + LOG_DEBUG(getLogger("InsertQuery"), + "called InterpreterInsertQuery::buildSink() engine {} table name {}.{}", table->getName(), table->getStorageID().database_name, table->getStorageID().table_name); + ThreadStatus * thread_status = current_thread; if (!thread_status_holder) @@ -465,16 +468,17 @@ BlockIO InterpreterInsertQuery::execute() * to avoid unnecessary squashing. */ + LOG_DEBUG(getLogger("InsertQuery"), + "execute() is_trivial_insert_select=true prefersLargeBlocks={}", table->prefersLargeBlocks()); + Settings new_settings = getContext()->getSettings(); new_settings.max_threads = std::max(1, settings.max_insert_threads); if (table->prefersLargeBlocks()) { - if (settings.min_insert_block_size_rows) - new_settings.max_block_size = settings.min_insert_block_size_rows; - if (settings.min_insert_block_size_bytes) - new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; + new_settings.max_block_size = std::max(settings.min_insert_block_size_rows, settings.max_block_size); + new_settings.preferred_block_size_bytes = std::max(settings.min_insert_block_size_bytes, settings.preferred_block_size_bytes); } auto new_context = Context::createCopy(context); @@ -527,6 +531,7 @@ BlockIO InterpreterInsertQuery::execute() /// Deduplication when passing insert_deduplication_token breaks if using more than one thread if (!settings.insert_deduplication_token.toString().empty()) { + /// TODO! LOG_DEBUG( getLogger("InsertQuery"), "Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues"); @@ -566,8 +571,13 @@ BlockIO InterpreterInsertQuery::execute() running_group = std::make_shared(getContext()); for (size_t i = 0; i < sink_streams_size; ++i) { + LOG_DEBUG(getLogger("InsertQuery"), + "call buildSink table name {}.{}, stream {}/{}", + table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams_size); + auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, running_group, /* elapsed_counter_ms= */ nullptr); + sink_chains.emplace_back(std::move(out)); } for (size_t i = 0; i < pre_streams_size; ++i) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 5e8ecdca95e..66264a46d9d 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -223,6 +223,8 @@ std::optional generateViewChain( else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views && !insert_settings.insert_deduplication_token.value.empty()) { + + /// TODO! /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle * deduplication in complex INSERT flows. * @@ -252,6 +254,8 @@ std::optional generateViewChain( else insert_deduplication_token += "_" + view_id.getFullNameNotQuoted(); + LOG_DEBUG(getLogger("PushingToViews"), "insert_deduplication_token {}", insert_deduplication_token); + insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); } @@ -483,6 +487,8 @@ Chain buildPushingToViewsChain( for (const auto & view_id : views) { + LOG_ERROR(&Poco::Logger::get("PushingToViews"), "dependent view: {}.{}", view_id.database_name, view_id.table_name); + try { auto out = generateViewChain( diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 863c951d957..3e47e3705b9 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -47,6 +47,8 @@ public: : data(data_) , log(getLogger(data.getLogName() + " (Writer)")) { + LOG_WARNING(log, "MergeTreeDataWriter() called from:\n{}", StackTrace().toString()); + } /** Split the block to blocks, each of them must be written as separate part. diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index b7dede3cb00..f0eb56aea13 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -40,6 +40,8 @@ MergeTreeSink::MergeTreeSink( , context(context_) , storage_snapshot(storage.getStorageSnapshotWithoutData(metadata_snapshot, context_)) { + LOG_INFO(storage.log, "MergeTreeSink() called for {}.{}", + storage_.getStorageID().database_name, storage_.getStorageID().getTableName()); } void MergeTreeSink::onStart() @@ -56,6 +58,10 @@ void MergeTreeSink::onFinish() void MergeTreeSink::consume(Chunk chunk) { + LOG_INFO(storage.log, "consume() called num_blocks_processed {}, chunks: rows {} columns {} bytes {}", + num_blocks_processed, + chunk.getNumRows(), chunk.getNumColumns(), chunk.bytes()); + if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(nullptr, context, false); @@ -65,6 +71,8 @@ void MergeTreeSink::consume(Chunk chunk) auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context); + LOG_INFO(storage.log, "consume() called part_blocks.count {}", part_blocks.size()); + using DelayedPartitions = std::vector; DelayedPartitions partitions; @@ -121,8 +129,16 @@ void MergeTreeSink::consume(Chunk chunk) else max_insert_delayed_streams_for_parallel_write = 0; + LOG_INFO(storage.log, "consume() called for {}.{} " + "streams {} + {} -> {}, " + "max {} support_parallel_write {}", + storage.getStorageID().database_name, storage.getStorageID().getTableName(), + streams, temp_part.streams.size(), streams + temp_part.streams.size(), + max_insert_delayed_streams_for_parallel_write, support_parallel_write); + /// In case of too much columns/parts in block, flush explicitly. streams += temp_part.streams.size(); + if (streams > max_insert_delayed_streams_for_parallel_write) { finishDelayedChunk(); @@ -156,8 +172,12 @@ void MergeTreeSink::finishDelayedChunk() if (!delayed_chunk) return; + LOG_INFO(storage.log, "finishDelayedChunk() called partitions count {}", delayed_chunk->partitions.size()); + for (auto & partition : delayed_chunk->partitions) { + LOG_INFO(storage.log, "finishDelayedChunk() part name {} dedup_token {}", partition.temp_part.part->name, partition.block_dedup_token); + ProfileEventsScope scoped_attach(&partition.part_counters); partition.temp_part.finalize(); @@ -174,9 +194,15 @@ void MergeTreeSink::finishDelayedChunk() storage.fillNewPartName(part, lock); auto * deduplication_log = storage.getDeduplicationLog(); + + LOG_INFO(storage.log, "finishDelayedChunk() has dedup log {}", bool(deduplication_log)); + if (deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); + + LOG_INFO(storage.log, "finishDelayedChunk() block_dedup_token={}, block_id={}", partition.block_dedup_token, block_id); + auto res = deduplication_log->addPart(block_id, part->info); if (!res.second) { diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index d8555d69788..12bc284f68c 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -50,6 +50,8 @@ MergedBlockOutputStream::MergedBlockOutputStream( data_part->storeVersionMetadata(); writer = data_part->getWriter(columns_list, metadata_snapshot, skip_indices, statistics, default_codec, writer_settings, computed_index_granularity); + + LOG_WARNING(getLogger("MergedBlockOutputStream()"), "called c-tor"); } /// If data is pre-sorted. @@ -329,6 +331,9 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Permutation * permutation) { + LOG_WARNING(getLogger("MergedBlockOutputStream()"), "writeImpl block rows {} size {} getPartDirectory {}", + block.rows(), block.bytes(), data_part_storage->getPartDirectory()); + block.checkNumberOfRows(); size_t rows = block.rows(); if (!rows) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 4b4f4c33e7d..2bb9aad1e53 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -535,7 +535,7 @@ bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::Mutabl ProfileEventsScope profile_events_scope; String original_part_dir = part->getDataPartStorage().getPartDirectory(); - auto try_rollback_part_rename = [this, &part, &original_part_dir]() + auto try_rollback_part_rename = [this, &part, &original_part_dir] () { if (original_part_dir == part->getDataPartStorage().getPartDirectory()) return; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql new file mode 100644 index 00000000000..8f718508ee8 --- /dev/null +++ b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql @@ -0,0 +1,93 @@ +DROP TABLE IF EXISTS table_a_b; +DROP TABLE IF EXISTS table_when_b_even; +DROP TABLE IF EXISTS mv_b_even; + + +SET max_insert_threads=1; +SET update_insert_deduplication_token_in_dependent_materialized_views=1; +SET deduplicate_blocks_in_dependent_materialized_views=1; + +SET max_block_size=3; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + + +CREATE TABLE table_a_b + ( + a String, + b UInt64, + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_a_b; + +CREATE TABLE table_when_b_even_wo_dedup + ( + a String, + b UInt64, + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=0; +SYSTEM STOP MERGES table_when_b_even; + +CREATE MATERIALIZED VIEW mv_b_even_wo_dedup +TO table_when_b_even_wo_dedup +AS + SELECT a, b + FROM table_a_b + WHERE b % 2 = 0; + +CREATE TABLE table_when_b_even_dedup + ( + a String, + b UInt64, + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_when_b_even; + +CREATE MATERIALIZED VIEW mv_b_even_dedup +TO table_when_b_even_dedup +AS + SELECT a, b + FROM table_a_b + WHERE b % 2 = 0; + + +SELECT 'first insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT toString(number DIV 2), number +FROM numbers(5) +SETTINGS send_logs_level='trace'; + + +SELECT 'second insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT toString(number DIV 2), number +FROM numbers(5) +SETTINGS send_logs_level='trace'; + + +SELECT 'table_a_b'; +SELECT 'count', count() FROM table_a_b; +SELECT _part, count() FROM table_a_b GROUP BY _part; + +SELECT 'table_when_b_even_wo_dedup'; +SELECT 'count', count() FROM table_when_b_even_wo_dedup; +SELECT _part, count() FROM table_when_b_even_wo_dedup GROUP BY _part; + +SELECT 'table_when_b_even_dedup'; +SELECT 'count', count() FROM table_when_b_even_dedup; +SELECT _part, count() FROM table_when_b_even_dedup GROUP BY _part; + + +DROP TABLE mv_b_even; +DROP TABLE table_when_b_even; +DROP TABLE table_a_b; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql new file mode 100644 index 00000000000..46b9bd52144 --- /dev/null +++ b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql @@ -0,0 +1,113 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE table_for_join_with + ( + a_join String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a_join, b); + +INSERT INTO table_for_join_with + SELECT 'joined_' || toString(number), number + FROM numbers(10); +SELECT 'table_for_join_with'; +SELECT a_join, b, _part FROM table_for_join_with ORDER BY _part, a_join, b; + + +CREATE TABLE table_a_b + ( + a_src String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a_src, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_a_b; + +CREATE TABLE table_when_b_even_dedup + ( + a_src String CODEC(NONE), + a_join String CODEC(NONE), + b UInt64 CODEC(NONE) + ) + ENGINE = MergeTree() + ORDER BY (a_src, a_join, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_when_b_even_dedup; + +CREATE MATERIALIZED VIEW mv_b_even_dedup + TO table_when_b_even_dedup + AS + SELECT a_src, a_join, b + FROM table_a_b + FULL OUTER JOIN table_for_join_with + ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 + ORDER BY a_src, a_join, b; + +CREATE TABLE table_when_b_even_wo_dedup + ( + a_src String CODEC(NONE), + a_join String CODEC(NONE), + b UInt64 CODEC(NONE) + ) + ENGINE = MergeTree() + ORDER BY (a_src, a_join, b) + SETTINGS non_replicated_deduplication_window=0; +SYSTEM STOP MERGES table_when_b_even_wo_dedup; + +CREATE MATERIALIZED VIEW mv_b_even_wo_dedup + TO table_when_b_even_wo_dedup + AS + SELECT a_src, a_join, b + FROM table_a_b + FULL OUTER JOIN table_for_join_with + ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 + ORDER BY a_src, a_join, b; + + +SET max_insert_threads=1; +SET update_insert_deduplication_token_in_dependent_materialized_views=1; +SET deduplicate_blocks_in_dependent_materialized_views=1; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + + +SELECT 'first insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT 'source_' || toString(number), number +FROM numbers(5) +SETTINGS send_logs_level='trace'; + + +SELECT 'second insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT 'source_' || toString(number), number +FROM numbers(5) +SETTINGS send_logs_level='trace'; + + +SELECT 'table_a_b'; +SELECT 'count', count() FROM table_a_b; +SELECT _part, count() FROM table_a_b GROUP BY _part; + +SELECT 'table_when_b_even_dedup, here the result if join is deduplicated inside one request, it is not correct'; +SELECT 'count', count() FROM table_when_b_even_dedup; +SELECT _part, count() FROM table_when_b_even_dedup GROUP BY _part; + +SELECT 'table_when_b_even_wo_dedup'; +SELECT 'count', count() FROM table_when_b_even_wo_dedup; +SELECT _part, count() FROM table_when_b_even_wo_dedup GROUP BY _part ORDER BY _part; + + +DROP TABLE mv_b_even_dedup; +DROP TABLE table_when_b_even_dedup; +DROP TABLE mv_b_even_wo_dedup; +DROP TABLE table_when_b_even_wo_dedup; +DROP TABLE table_a_b; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql new file mode 100644 index 00000000000..02546af69dc --- /dev/null +++ b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql @@ -0,0 +1,119 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE table_source + ( + a String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_source; + +CREATE TABLE table_dst_dedup + ( + a String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_dst_dedup; + +CREATE MATERIALIZED VIEW mv_b_even_dedup + TO table_dst_dedup + AS + SELECT a, b + FROM table_source + WHERE b % 2 = 0; + +CREATE MATERIALIZED VIEW mv_b_even_even_dedup + TO table_dst_dedup + AS + SELECT a, b + FROM table_source + WHERE b % 4 = 0; + +CREATE TABLE table_dst_wo_dedup + ( + a String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=0; +SYSTEM STOP MERGES table_dst_wo_dedup; + +CREATE MATERIALIZED VIEW mv_b_even_wo_dedup + TO table_dst_wo_dedup + AS + SELECT a, b + FROM table_source + WHERE b % 2 = 0; + +CREATE MATERIALIZED VIEW mv_b_even_wo_even_dedup + TO table_dst_wo_dedup + AS + SELECT a, b + FROM table_source + WHERE b % 4 = 0; + + +SET max_insert_threads=1; +SET update_insert_deduplication_token_in_dependent_materialized_views=1; +SET deduplicate_blocks_in_dependent_materialized_views=1; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + + +SELECT 'first insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_source +SELECT 'source_' || toString(number), number +FROM numbers(8) +SETTINGS send_logs_level='trace'; + +SELECT 'table_source'; +SELECT 'count', count() FROM table_source; +SELECT _part, count() FROM table_source GROUP BY _part ORDER BY _part; + +SELECT 'table_dst_dedup'; +SELECT 'count', count() FROM table_dst_dedup; +SELECT _part, count() FROM table_dst_dedup GROUP BY _part ORDER BY _part; + +SELECT 'table_dst_wo_dedup'; +SELECT 'count', count() FROM table_dst_wo_dedup; +SELECT _part, count() FROM table_dst_wo_dedup GROUP BY _part ORDER BY _part; + + +SELECT 'second insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_source +SELECT 'source_' || toString(number), number +FROM numbers(8) +SETTINGS send_logs_level='trace'; + +SELECT 'table_source'; +SELECT 'count', count() FROM table_source; +SELECT _part, count() FROM table_source GROUP BY _part ORDER BY _part; + +SELECT 'table_dst_dedup, block from different mv is deduplicated, it is wrong'; +SELECT 'count', count() FROM table_dst_dedup; +SELECT _part, count() FROM table_dst_dedup GROUP BY _part ORDER BY _part; + +SELECT 'table_dst_wo_dedup'; +SELECT 'count', count() FROM table_dst_wo_dedup; +SELECT _part, count() FROM table_dst_wo_dedup GROUP BY _part ORDER BY _part; + + +DROP TABLE mv_b_even_dedup; +DROP TABLE mv_b_even_even_dedup; +DROP TABLE mv_b_even_wo_dedup; +DROP TABLE mv_b_even_even_wo_dedup; +DROP TABLE table_dst_dedup; +DROP TABLE table_dst_wo_dedup; +DROP TABLE table_source; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql new file mode 100644 index 00000000000..213b449dd73 --- /dev/null +++ b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql @@ -0,0 +1,76 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE table_a_b + ( + a String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_a_b; + +CREATE TABLE table_when_b_even + ( + a String CODEC(NONE), + b UInt64 CODEC(NONE) + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_when_b_even; + +CREATE MATERIALIZED VIEW mv_b_even + TO table_when_b_even + AS + SELECT a, b + FROM table_a_b + WHERE b % 2 = 0; + + +SET max_insert_threads=1; +SET update_insert_deduplication_token_in_dependent_materialized_views=1; +SET deduplicate_blocks_in_dependent_materialized_views=1; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + + +SELECT 'first insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT 'source_' || toString(1), 1 +FROM numbers(5) +SETTINGS send_logs_level='trace'; + +SELECT 'table_a_b, it deduplicates rows within one insert, it is wrong'; +SELECT 'count', count() FROM table_a_b; +SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part; + +SELECT 'table_when_b_even'; +SELECT 'count', count() FROM table_when_b_even; +SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part; + + +SELECT 'second insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT 'source_' || toString(1), 1 +FROM numbers(5) +SETTINGS send_logs_level='trace'; + +SELECT 'table_a_b'; +SELECT 'count', count() FROM table_a_b; +SELECT _part, count() FROM table_a_b GROUP BY _part; + +SELECT 'table_when_b_even'; +SELECT 'count', count() FROM table_when_b_even; +SELECT _part, count() FROM table_when_b_even GROUP BY _part; + + +DROP TABLE mv_b_even; +DROP TABLE table_when_b_even; +DROP TABLE table_a_b; From 8b7563040c39dc11647606cdd9f8d77dc6e4cc84 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 19 Mar 2024 19:06:42 +0100 Subject: [PATCH 060/439] non replicated inserts with deduplication user token --- src/Interpreters/InterpreterInsertQuery.cpp | 32 +- .../Transforms/buildPushingToViewsChain.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataWriter.h | 3 +- .../MergeTree/MergedBlockOutputStream.cpp | 4 +- .../0_stateless/03008_deduplication.python | 561 +++++++++++ ...uplication_insert_several_blocks.reference | 870 ++++++++++++++++++ ...008_deduplication_insert_several_blocks.sh | 92 ++ ...tion_mv_generates_several_blocks.reference | 814 ++++++++++++++++ ...duplication_mv_generates_several_blocks.sh | 98 ++ ...cation_several_mv_into_one_table.reference | 590 ++++++++++++ ...deduplication_several_mv_into_one_table.sh | 106 +++ ..._non_replicated_deduplication_mv.reference | 0 .../03008_non_replicated_deduplication_mv.sql | 93 -- ...eduplication_mv_collision_in_dst.reference | 0 ...ated_deduplication_mv_collision_in_dst.sql | 113 --- ...lision_in_dst_from_different_src.reverence | 0 ...mv_collision_in_dst_from_different_src.sql | 119 --- ...eduplication_mv_collision_in_src.reference | 0 ...ated_deduplication_mv_collision_in_src.sql | 76 -- 19 files changed, 3151 insertions(+), 422 deletions(-) create mode 100644 tests/queries/0_stateless/03008_deduplication.python create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index d1a9ead480e..a5396be9b76 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -519,29 +519,29 @@ BlockIO InterpreterInsertQuery::execute() if (settings.max_insert_threads > 1) { - auto table_id = table->getStorageID(); - auto views = DatabaseCatalog::instance().getDependentViews(table_id); + pre_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); - /// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them. - /// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts. - const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert(); - pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads - : std::min(settings.max_insert_threads, pipeline.getNumStreams()); - /// Deduplication when passing insert_deduplication_token breaks if using more than one thread - if (!settings.insert_deduplication_token.toString().empty()) - { - /// TODO! - LOG_DEBUG( - getLogger("InsertQuery"), - "Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues"); - pre_streams_size = 1; - } +// /// Deduplication when passing insert_deduplication_token breaks if using more than one thread +// if (!settings.insert_deduplication_token.toString().empty()) +// { +// /// TODO! +// LOG_DEBUG( +// getLogger("InsertQuery"), +// "Insert-select query using insert_deduplication_token, setting streams from {} to 1 to avoid deduplication issues, pipeline.getNumStreams() {}", +// pre_streams_size, pipeline.getNumStreams()); +// pre_streams_size = 1; +// } if (table->supportsParallelInsert()) sink_streams_size = pre_streams_size; } + LOG_DEBUG( + getLogger("InsertQuery"), + "pre_streams_size {}, pipeline.getNumStreams() {}", + pre_streams_size, pipeline.getNumStreams()); + pipeline.resize(pre_streams_size); /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 66264a46d9d..70f30faa5b1 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -487,7 +487,7 @@ Chain buildPushingToViewsChain( for (const auto & view_id : views) { - LOG_ERROR(&Poco::Logger::get("PushingToViews"), "dependent view: {}.{}", view_id.database_name, view_id.table_name); + LOG_DEBUG(&Poco::Logger::get("PushingToViews"), "dependent view: {}.{}", view_id.database_name, view_id.table_name); try { diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 3e47e3705b9..a9a44813545 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -47,8 +47,7 @@ public: : data(data_) , log(getLogger(data.getLogName() + " (Writer)")) { - LOG_WARNING(log, "MergeTreeDataWriter() called from:\n{}", StackTrace().toString()); - + LOG_DEBUG(log, "MergeTreeDataWriter() called from:\n{}", StackTrace().toString()); } /** Split the block to blocks, each of them must be written as separate part. diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 12bc284f68c..fd2b05f615e 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -51,7 +51,7 @@ MergedBlockOutputStream::MergedBlockOutputStream( writer = data_part->getWriter(columns_list, metadata_snapshot, skip_indices, statistics, default_codec, writer_settings, computed_index_granularity); - LOG_WARNING(getLogger("MergedBlockOutputStream()"), "called c-tor"); + LOG_DEBUG(getLogger("MergedBlockOutputStream()"), "called c-tor"); } /// If data is pre-sorted. @@ -331,7 +331,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Permutation * permutation) { - LOG_WARNING(getLogger("MergedBlockOutputStream()"), "writeImpl block rows {} size {} getPartDirectory {}", + LOG_DEBUG(getLogger("MergedBlockOutputStream()"), "writeImpl block rows {} size {} getPartDirectory {}", block.rows(), block.bytes(), data_part_storage->getPartDirectory()); block.checkNumberOfRows(); diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python new file mode 100644 index 00000000000..3cd29247910 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication.python @@ -0,0 +1,561 @@ +#!/usr/bin/env python3 + +import os +import sys +import argparse +import string + + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) + + +def __format(template, **params): + field_names = [v[1] for v in string.Formatter().parse(template) if v[1] is not None] + kv_args = {} + for field in field_names: + if field in params: + kv_args[field] = params[field] + else: + kv_args[field] = "" + + return template.format(**kv_args) + + +def instance_create_statement(table_name, table_columns, table_keys, table_engine, with_deduplication, no_merges=True): + template = """ + CREATE TABLE {table_name} + {table_columns} + ENGINE = {table_engine} + ORDER BY {table_keys} + {table_settings}; + {table_no_merges} + """ + + params = dict() + params["table_name"] = table_name + params["table_columns"] = table_columns + params["table_keys"] = table_keys + params["table_no_merges"] = f"SYSTEM STOP MERGES {table_name};" if no_merges else "" + params["table_engine"] = "MergeTree()" if table_engine == "MergeTree" else f"ReplicatedMergeTree('/clickhouse/tables/{{database}}/{table_name}', '1')" + + deduplication_window_setting_name = "non_replicated_deduplication_window" if table_engine == "MergeTree" else "replicated_deduplication_window" + deduplication_window_setting_value = 1000 if with_deduplication else 0 + + settings = list() + settings += [f"{deduplication_window_setting_name}={deduplication_window_setting_value}"] + params["table_settings"] = "SETTINGS " + ",".join(settings) + + return __format(template, **params) + + +def instance_insert_statement(table_name, count, insert_unique_blocks, use_insert_token): + template = """ + INSERT INTO {table_name} + SELECT {insert_columns} + FROM numbers({count}) {insert_settings}; + """ + return __format( + template, + table_name=table_name, + count=count, + insert_columns="'src_4', 4" if not insert_unique_blocks else "'src_' || toString(number), number", + insert_settings="" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'", + ) + + +def get_drop_tables_statements(tables): + return "".join([f"DROP TABLE IF EXISTS {table_name};\n" for table_name in tables[::-1]]) + + +def get_logs_statement(args): + if args.get_logs: + return "SET send_logs_level='test';" + return "" + +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +class ArgsFactory: + def __init__(self, parser): + self.__parser = parser + + def add_opt_engine(self): + self.__parser.add_argument( + "--table-engine", choices=["ReplicatedMergeTree", "MergeTree"], default="MergeTree") + + def add_opt_user_token(self): + self.__parser.add_argument("--use-insert-token", type=str2bool, nargs='?', const=True, default=False) + + def add_opt_single_thread(self): + self.__parser.add_argument("--single-thread", type=str2bool, nargs='?', const=True, default=True) + + def add_opt_dedup_src(self): + self.__parser.add_argument("--deduplicate-src-table", type=str2bool, nargs='?', const=True, default=True) + + def add_opt_dedup_dst(self): + self.__parser.add_argument("--deduplicate-dst-table", type=str2bool, nargs='?', const=True, default=True) + + def add_opt_get_logs(self): + self.__parser.add_argument("--get-logs", type=str2bool, nargs='?', const=True, default=False) + + def add_opt_uniq_blocks(self): + self.__parser.add_argument("--insert-unique-blocks", type=str2bool, nargs='?', const=True, default=True) + + def add_all(self): + self.add_opt_engine() + self.add_opt_user_token() + self.add_opt_single_thread() + self.add_opt_dedup_src() + self.add_opt_dedup_dst() + self.add_opt_get_logs() + self.add_opt_uniq_blocks() + + +def test_insert_several_blocks(parser): + ArgsFactory(parser).add_all() + + def calle(args): + create_table_a_b_statement = instance_create_statement( + table_name="table_a_b", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_src_table, + ) + + create_table_when_b_even_statement = instance_create_statement( + table_name="table_when_b_even", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_dst_table, + ) + + create_mv_statement = """ + CREATE MATERIALIZED VIEW mv_b_even + TO table_when_b_even + AS + SELECT a, b + FROM table_a_b + WHERE b % 2 = 0; + """ + + drop_tables_statements = get_drop_tables_statements( ["table_a_b", "table_when_b_even", "mv_b_even"] ) + + insert_statement = instance_insert_statement( + "table_a_b", 10, args.insert_unique_blocks, args.use_insert_token + ) + + print_details_statements = f""" + SELECT 'table_a_b'; + SELECT 'count', count() FROM table_a_b; + {"" if not args.get_logs else "SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part;"} + + SELECT 'table_when_b_even'; + SELECT 'count', count() FROM table_when_b_even; + {"" if not args.get_logs else "SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part;"} + """ + + + + if args.insert_unique_blocks: + assert_first_insert_statements = f""" + SELECT throwIf( count() != 10 ) + FROM table_a_b; + SELECT throwIf( count() != 5 ) + FROM table_when_b_even; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) + FROM table_a_b; + SELECT throwIf( count() != {5 if args.deduplicate_dst_table else 10} ) + FROM table_when_b_even; + """ + else: + if args.use_insert_token: + assert_first_insert_statements = """ + SELECT throwIf( count() != 10 ) + FROM table_a_b; + SELECT throwIf( count() != 10 ) + FROM table_when_b_even; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) + FROM table_a_b; + SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) + FROM table_when_b_even; + """ + else: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 10} ) + FROM table_when_b_even; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 20} ) + FROM table_a_b; + SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 20} ) + FROM table_when_b_even; + """ + + script = f""" + {get_logs_statement(args)} + + SET max_insert_threads={1 if args.single_thread else 10}; + SET update_insert_deduplication_token_in_dependent_materialized_views=1; + SET deduplicate_blocks_in_dependent_materialized_views=1; + + SET max_block_size=1; + SET min_insert_block_size_rows=0; + SET min_insert_block_size_bytes=0; + + {drop_tables_statements} + + {create_table_a_b_statement} + + {create_table_when_b_even_statement} + + {create_mv_statement} + + -- first insert + {insert_statement} + + {print_details_statements} + + {assert_first_insert_statements} + + -- second insert, it is retry + {insert_statement} + + {print_details_statements} + + {assert_second_insert_statements} + + {drop_tables_statements} + """ + + print(script) + + parser.set_defaults(func=calle) + + +def test_mv_generates_several_blocks(parser): + ArgsFactory(parser).add_all() + + def calle(args): + tables = ["table_for_join_with", "table_a_b", "table_when_b_even_and_joined", "mv_b_even"] + drop_tables_statements = get_drop_tables_statements(tables) + + details_print_for_table_for_join_with = "" + if args.get_logs: + details_print_for_table_for_join_with = """ + SELECT 'table_for_join_with'; + SELECT a_join, b, _part FROM table_for_join_with ORDER BY _part, a_join, b; + """ + + create_table_a_b_statement = instance_create_statement( + table_name="table_a_b", + table_columns="(a_src String, b UInt64)", + table_keys="(a_src, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_src_table, + ) + + create_table_when_b_even_and_joined_statement = instance_create_statement( + table_name="table_when_b_even_and_joined", + table_columns="(a_src String, a_join String, b UInt64)", + table_keys="(a_src, a_join, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_dst_table, + ) + + insert_statement = instance_insert_statement( + "table_a_b", 5, args.insert_unique_blocks, args.use_insert_token + ) + + details_print_statements = f""" + SELECT 'table_a_b'; + SELECT 'count', count() FROM table_a_b; + + SELECT 'table_when_b_even_and_joined'; + SELECT 'count', count() FROM table_when_b_even_and_joined; + {"" if not args.get_logs else "SELECT _part, a_src, a_join, b FROM table_when_b_even_and_joined ORDER BY _part;"} + """ + + if args.insert_unique_blocks: + assert_first_insert_statements = f""" + SELECT throwIf( count() != 5 ) + FROM table_a_b; + + SELECT throwIf( count() != 47 ) + FROM table_when_b_even_and_joined; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + + SELECT throwIf( count() != {47 if args.deduplicate_dst_table else 94} ) + FROM table_when_b_even_and_joined; + """ + else: + if args.use_insert_token: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {5 if args.deduplicate_src_table else 5} ) + FROM table_a_b; + + SELECT throwIf( count() != {45 if args.deduplicate_dst_table else 45} ) + FROM table_when_b_even_and_joined; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + + SELECT throwIf( count() != {45 if args.deduplicate_dst_table else 90} ) + FROM table_when_b_even_and_joined; + """ + else: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 5} ) + FROM table_a_b; + + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 45} ) + FROM table_when_b_even_and_joined; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 90} ) + FROM table_when_b_even_and_joined; + """ + + script = f""" + {get_logs_statement(args)} + + SET max_insert_threads={1 if args.single_thread else 10}; + SET update_insert_deduplication_token_in_dependent_materialized_views=1; + SET deduplicate_blocks_in_dependent_materialized_views=1; + + SET max_block_size=1; + SET min_insert_block_size_rows=0; + SET min_insert_block_size_bytes=0; + + {drop_tables_statements} + + CREATE TABLE table_for_join_with + (a_join String, b UInt64) + ENGINE = MergeTree() + ORDER BY (a_join, b); + INSERT INTO table_for_join_with + SELECT 'joined_' || toString(number), number + FROM numbers(9); + {details_print_for_table_for_join_with} + + {create_table_a_b_statement} + SYSTEM STOP MERGES table_a_b; + + {create_table_when_b_even_and_joined_statement} + SYSTEM STOP MERGES table_when_b_even_and_joined; + + CREATE MATERIALIZED VIEW mv_b_even + TO table_when_b_even_and_joined + AS + SELECT a_src, a_join, table_for_join_with.b as b + FROM table_a_b + FULL OUTER JOIN table_for_join_with + ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 + ORDER BY a_src, a_join, b; + + -- first insert + {insert_statement} + + {details_print_statements} + + -- first assertion + {assert_first_insert_statements} + + -- second insert + {insert_statement} + + {details_print_statements} + + -- second assertion + {assert_second_insert_statements} + + {drop_tables_statements} + """ + + print(script) + + parser.set_defaults(func=calle) + + +def test_several_mv_into_one_table(parser): + ArgsFactory(parser).add_all() + + def calle(args): + tables = ["table_src", "table_dst", "mv_b_even", "mv_b_even_even"] + drop_tables_statements = get_drop_tables_statements(tables) + + create_table_src_statement = instance_create_statement( + table_name="table_src", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_src_table, + ) + + create_table_dst_statement = instance_create_statement( + table_name="table_dst", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_dst_table, + ) + + insert_statement = instance_insert_statement( + "table_src", 8, args.insert_unique_blocks, args.use_insert_token + ) + + details_print_statements = f""" + SELECT 'table_src count', count() FROM table_src; + + SELECT 'table_dst count', count() FROM table_dst; + {"" if not args.get_logs else "SELECT _part, count() FROM table_dst GROUP BY _part ORDER BY _part;"} + """ + + if args.insert_unique_blocks: + assert_first_insert_statements = f""" + SELECT throwIf( count() != 8 ) + FROM table_src; + + SELECT throwIf( count() != 6 ) + FROM table_dst; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) + FROM table_src; + + SELECT throwIf( count() != {6 if args.deduplicate_dst_table else 12} ) + FROM table_dst; + """ + else: + if args.use_insert_token: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {8 if args.deduplicate_src_table else 8} ) + FROM table_src; + + SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 16} ) + FROM table_dst; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) + FROM table_src; + + SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 32} ) + FROM table_dst; + """ + else: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 8} ) + FROM table_src; + + SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 16} ) + FROM table_dst; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 16} ) + FROM table_src; + + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 32} ) + FROM table_dst; + """ + + script = f""" + {get_logs_statement(args)} + + SET max_insert_threads={1 if args.single_thread else 10}; + SET update_insert_deduplication_token_in_dependent_materialized_views=1; + SET deduplicate_blocks_in_dependent_materialized_views=1; + + SET max_block_size=1; + SET min_insert_block_size_rows=0; + SET min_insert_block_size_bytes=0; + + {drop_tables_statements} + + {create_table_src_statement} + + {create_table_dst_statement} + + CREATE MATERIALIZED VIEW mv_b_even + TO table_dst + AS + SELECT a, b + FROM table_src + WHERE b % 2 = 0; + + CREATE MATERIALIZED VIEW mv_b_even_even + TO table_dst + AS + SELECT a, b + FROM table_src + WHERE b % 4 = 0; + + -- first insert + {insert_statement} + + {details_print_statements} + + {assert_first_insert_statements} + + -- second insert, retry + {insert_statement} + + {details_print_statements} + + {assert_second_insert_statements} + + {drop_tables_statements} + """ + + print(script) + + parser.set_defaults(func=calle) + + +def parse_args(): + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers(dest="test") + test_insert_several_blocks( + subparsers.add_parser("insert_several_blocks_into_table") + ) + test_mv_generates_several_blocks( + subparsers.add_parser("mv_generates_several_blocks") + ) + test_several_mv_into_one_table( + subparsers.add_parser("several_mv_into_one_table") + ) + args = parser.parse_args() + if args.test is None: + parser.print_help() + return args + + +def main(): + args = parse_args() + if args.test is not None: + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference new file mode 100644 index 00000000000..35b2642a4d2 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference @@ -0,0 +1,870 @@ + +Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even +count 1 +EXPECTED_TO_FAIL + +Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +EXPECTED_TO_FAIL + +Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even +count 5 +EXPECTED_TO_FAIL + +Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +EXPECTED_TO_FAIL + +Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 1 +0 +EXPECTED_TO_FAIL + +Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +EXPECTED_TO_FAIL + +Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even +count 1 +EXPECTED_TO_FAIL + +Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +EXPECTED_TO_FAIL + +Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even +count 5 +EXPECTED_TO_FAIL + +Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +EXPECTED_TO_FAIL + +Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 1 +0 +EXPECTED_TO_FAIL + +Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +EXPECTED_TO_FAIL + +Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh new file mode 100755 index 00000000000..5b07f6033ad --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +# fails, it is a error. Several blocks in scr table with the same user token are processed in parallel and deduplicated + +# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" +# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False" +# fails, it is a error. The same situation as first one, but on dst table. + +RUN_ONLY="" +#RUN_ONLY="" + +KNOWN_ERRORS=(8 9 10 11 12 13) + +function is_known_error() +{ + n=$1 + for e in "${KNOWN_ERRORS[@]}"; do + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + return 0 + fi + done + return 1 +} + +i=0 +for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference new file mode 100644 index 00000000000..eccdbd52f37 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference @@ -0,0 +1,814 @@ + +Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +EXPECTED_TO_FAIL + +Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +EXPECTED_TO_FAIL + +Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even_and_joined +count 47 +EXPECTED_TO_FAIL + +Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +EXPECTED_TO_FAIL + +Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +EXPECTED_TO_FAIL + +Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +EXPECTED_TO_FAIL + +Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +EXPECTED_TO_FAIL + +Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +EXPECTED_TO_FAIL + +Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even_and_joined +count 47 +EXPECTED_TO_FAIL + +Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +EXPECTED_TO_FAIL + +Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +EXPECTED_TO_FAIL + +Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +EXPECTED_TO_FAIL + +Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh new file mode 100755 index 00000000000..1dd648583c6 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +# failed due to race in multi thread insertion, blocks are deduplicated in different threads + +# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +# the same as first but for dst table + +# Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# dst table deduplicates all incoming blocks from one insert because not uniq hash + +RUN_ONLY="" +#RUN_ONLY="Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +KNOWN_ERRORS=(8 9 10 11 12 13 16 20 24 28) + +function is_known_error() +{ + n=$1 + for e in "${KNOWN_ERRORS[@]}"; do + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + return 0 + fi + done + return 1 +} + +i=0 +for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference new file mode 100644 index 00000000000..12eea604e3a --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference @@ -0,0 +1,590 @@ + +Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 1 +table_dst count 2 +EXPECTED_TO_FAIL + +Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +EXPECTED_TO_FAIL + +Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 1 +table_dst count 6 +EXPECTED_TO_FAIL + +Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +EXPECTED_TO_FAIL + +Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 2 +0 +EXPECTED_TO_FAIL + +Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +EXPECTED_TO_FAIL + +Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 1 +table_dst count 2 +EXPECTED_TO_FAIL + +Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +EXPECTED_TO_FAIL + +Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 1 +table_dst count 6 +EXPECTED_TO_FAIL + +Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +EXPECTED_TO_FAIL + +Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 2 +0 +EXPECTED_TO_FAIL + +Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +EXPECTED_TO_FAIL + +Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh new file mode 100755 index 00000000000..487b3ac5f88 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +# race condition on insert into src table + +# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +# race condition on insert into dst table + +# Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# dst deduplicates blocks from one inserts from different materialized view + +# Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +# Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +# Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +# Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +# dst deduplicates blocks from different inserts by hash + +KNOWN_ERRORS=(8 9 10 11 12 13 16 20 24 28 17 21 25 29) + +function is_known_error() +{ + n=$1 + for e in "${KNOWN_ERRORS[@]}"; do + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + return 0 + fi + done + return 1 +} + +RUN_ONLY="" +#RUN_ONLY="Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done + done + done + done + done +done + +echo +echo "All cases executed" + + diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql deleted file mode 100644 index 8f718508ee8..00000000000 --- a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql +++ /dev/null @@ -1,93 +0,0 @@ -DROP TABLE IF EXISTS table_a_b; -DROP TABLE IF EXISTS table_when_b_even; -DROP TABLE IF EXISTS mv_b_even; - - -SET max_insert_threads=1; -SET update_insert_deduplication_token_in_dependent_materialized_views=1; -SET deduplicate_blocks_in_dependent_materialized_views=1; - -SET max_block_size=3; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - - -CREATE TABLE table_a_b - ( - a String, - b UInt64, - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_a_b; - -CREATE TABLE table_when_b_even_wo_dedup - ( - a String, - b UInt64, - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=0; -SYSTEM STOP MERGES table_when_b_even; - -CREATE MATERIALIZED VIEW mv_b_even_wo_dedup -TO table_when_b_even_wo_dedup -AS - SELECT a, b - FROM table_a_b - WHERE b % 2 = 0; - -CREATE TABLE table_when_b_even_dedup - ( - a String, - b UInt64, - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_when_b_even; - -CREATE MATERIALIZED VIEW mv_b_even_dedup -TO table_when_b_even_dedup -AS - SELECT a, b - FROM table_a_b - WHERE b % 2 = 0; - - -SELECT 'first insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT toString(number DIV 2), number -FROM numbers(5) -SETTINGS send_logs_level='trace'; - - -SELECT 'second insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT toString(number DIV 2), number -FROM numbers(5) -SETTINGS send_logs_level='trace'; - - -SELECT 'table_a_b'; -SELECT 'count', count() FROM table_a_b; -SELECT _part, count() FROM table_a_b GROUP BY _part; - -SELECT 'table_when_b_even_wo_dedup'; -SELECT 'count', count() FROM table_when_b_even_wo_dedup; -SELECT _part, count() FROM table_when_b_even_wo_dedup GROUP BY _part; - -SELECT 'table_when_b_even_dedup'; -SELECT 'count', count() FROM table_when_b_even_dedup; -SELECT _part, count() FROM table_when_b_even_dedup GROUP BY _part; - - -DROP TABLE mv_b_even; -DROP TABLE table_when_b_even; -DROP TABLE table_a_b; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql deleted file mode 100644 index 46b9bd52144..00000000000 --- a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql +++ /dev/null @@ -1,113 +0,0 @@ -DROP TABLE IF EXISTS test; - -CREATE TABLE table_for_join_with - ( - a_join String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a_join, b); - -INSERT INTO table_for_join_with - SELECT 'joined_' || toString(number), number - FROM numbers(10); -SELECT 'table_for_join_with'; -SELECT a_join, b, _part FROM table_for_join_with ORDER BY _part, a_join, b; - - -CREATE TABLE table_a_b - ( - a_src String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a_src, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_a_b; - -CREATE TABLE table_when_b_even_dedup - ( - a_src String CODEC(NONE), - a_join String CODEC(NONE), - b UInt64 CODEC(NONE) - ) - ENGINE = MergeTree() - ORDER BY (a_src, a_join, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_when_b_even_dedup; - -CREATE MATERIALIZED VIEW mv_b_even_dedup - TO table_when_b_even_dedup - AS - SELECT a_src, a_join, b - FROM table_a_b - FULL OUTER JOIN table_for_join_with - ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 - ORDER BY a_src, a_join, b; - -CREATE TABLE table_when_b_even_wo_dedup - ( - a_src String CODEC(NONE), - a_join String CODEC(NONE), - b UInt64 CODEC(NONE) - ) - ENGINE = MergeTree() - ORDER BY (a_src, a_join, b) - SETTINGS non_replicated_deduplication_window=0; -SYSTEM STOP MERGES table_when_b_even_wo_dedup; - -CREATE MATERIALIZED VIEW mv_b_even_wo_dedup - TO table_when_b_even_wo_dedup - AS - SELECT a_src, a_join, b - FROM table_a_b - FULL OUTER JOIN table_for_join_with - ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 - ORDER BY a_src, a_join, b; - - -SET max_insert_threads=1; -SET update_insert_deduplication_token_in_dependent_materialized_views=1; -SET deduplicate_blocks_in_dependent_materialized_views=1; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - - -SELECT 'first insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT 'source_' || toString(number), number -FROM numbers(5) -SETTINGS send_logs_level='trace'; - - -SELECT 'second insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT 'source_' || toString(number), number -FROM numbers(5) -SETTINGS send_logs_level='trace'; - - -SELECT 'table_a_b'; -SELECT 'count', count() FROM table_a_b; -SELECT _part, count() FROM table_a_b GROUP BY _part; - -SELECT 'table_when_b_even_dedup, here the result if join is deduplicated inside one request, it is not correct'; -SELECT 'count', count() FROM table_when_b_even_dedup; -SELECT _part, count() FROM table_when_b_even_dedup GROUP BY _part; - -SELECT 'table_when_b_even_wo_dedup'; -SELECT 'count', count() FROM table_when_b_even_wo_dedup; -SELECT _part, count() FROM table_when_b_even_wo_dedup GROUP BY _part ORDER BY _part; - - -DROP TABLE mv_b_even_dedup; -DROP TABLE table_when_b_even_dedup; -DROP TABLE mv_b_even_wo_dedup; -DROP TABLE table_when_b_even_wo_dedup; -DROP TABLE table_a_b; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql deleted file mode 100644 index 02546af69dc..00000000000 --- a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql +++ /dev/null @@ -1,119 +0,0 @@ -DROP TABLE IF EXISTS test; - -CREATE TABLE table_source - ( - a String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_source; - -CREATE TABLE table_dst_dedup - ( - a String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_dst_dedup; - -CREATE MATERIALIZED VIEW mv_b_even_dedup - TO table_dst_dedup - AS - SELECT a, b - FROM table_source - WHERE b % 2 = 0; - -CREATE MATERIALIZED VIEW mv_b_even_even_dedup - TO table_dst_dedup - AS - SELECT a, b - FROM table_source - WHERE b % 4 = 0; - -CREATE TABLE table_dst_wo_dedup - ( - a String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=0; -SYSTEM STOP MERGES table_dst_wo_dedup; - -CREATE MATERIALIZED VIEW mv_b_even_wo_dedup - TO table_dst_wo_dedup - AS - SELECT a, b - FROM table_source - WHERE b % 2 = 0; - -CREATE MATERIALIZED VIEW mv_b_even_wo_even_dedup - TO table_dst_wo_dedup - AS - SELECT a, b - FROM table_source - WHERE b % 4 = 0; - - -SET max_insert_threads=1; -SET update_insert_deduplication_token_in_dependent_materialized_views=1; -SET deduplicate_blocks_in_dependent_materialized_views=1; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - - -SELECT 'first insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_source -SELECT 'source_' || toString(number), number -FROM numbers(8) -SETTINGS send_logs_level='trace'; - -SELECT 'table_source'; -SELECT 'count', count() FROM table_source; -SELECT _part, count() FROM table_source GROUP BY _part ORDER BY _part; - -SELECT 'table_dst_dedup'; -SELECT 'count', count() FROM table_dst_dedup; -SELECT _part, count() FROM table_dst_dedup GROUP BY _part ORDER BY _part; - -SELECT 'table_dst_wo_dedup'; -SELECT 'count', count() FROM table_dst_wo_dedup; -SELECT _part, count() FROM table_dst_wo_dedup GROUP BY _part ORDER BY _part; - - -SELECT 'second insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_source -SELECT 'source_' || toString(number), number -FROM numbers(8) -SETTINGS send_logs_level='trace'; - -SELECT 'table_source'; -SELECT 'count', count() FROM table_source; -SELECT _part, count() FROM table_source GROUP BY _part ORDER BY _part; - -SELECT 'table_dst_dedup, block from different mv is deduplicated, it is wrong'; -SELECT 'count', count() FROM table_dst_dedup; -SELECT _part, count() FROM table_dst_dedup GROUP BY _part ORDER BY _part; - -SELECT 'table_dst_wo_dedup'; -SELECT 'count', count() FROM table_dst_wo_dedup; -SELECT _part, count() FROM table_dst_wo_dedup GROUP BY _part ORDER BY _part; - - -DROP TABLE mv_b_even_dedup; -DROP TABLE mv_b_even_even_dedup; -DROP TABLE mv_b_even_wo_dedup; -DROP TABLE mv_b_even_even_wo_dedup; -DROP TABLE table_dst_dedup; -DROP TABLE table_dst_wo_dedup; -DROP TABLE table_source; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql deleted file mode 100644 index 213b449dd73..00000000000 --- a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql +++ /dev/null @@ -1,76 +0,0 @@ -DROP TABLE IF EXISTS test; - -CREATE TABLE table_a_b - ( - a String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_a_b; - -CREATE TABLE table_when_b_even - ( - a String CODEC(NONE), - b UInt64 CODEC(NONE) - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_when_b_even; - -CREATE MATERIALIZED VIEW mv_b_even - TO table_when_b_even - AS - SELECT a, b - FROM table_a_b - WHERE b % 2 = 0; - - -SET max_insert_threads=1; -SET update_insert_deduplication_token_in_dependent_materialized_views=1; -SET deduplicate_blocks_in_dependent_materialized_views=1; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - - -SELECT 'first insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT 'source_' || toString(1), 1 -FROM numbers(5) -SETTINGS send_logs_level='trace'; - -SELECT 'table_a_b, it deduplicates rows within one insert, it is wrong'; -SELECT 'count', count() FROM table_a_b; -SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part; - -SELECT 'table_when_b_even'; -SELECT 'count', count() FROM table_when_b_even; -SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part; - - -SELECT 'second insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT 'source_' || toString(1), 1 -FROM numbers(5) -SETTINGS send_logs_level='trace'; - -SELECT 'table_a_b'; -SELECT 'count', count() FROM table_a_b; -SELECT _part, count() FROM table_a_b GROUP BY _part; - -SELECT 'table_when_b_even'; -SELECT 'count', count() FROM table_when_b_even; -SELECT _part, count() FROM table_when_b_even GROUP BY _part; - - -DROP TABLE mv_b_even; -DROP TABLE table_when_b_even; -DROP TABLE table_a_b; From 02c9a07778cdc6295d2ebf972c52de389e7edabb Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 12 Apr 2024 15:04:52 +0200 Subject: [PATCH 061/439] work in progress --- src/Common/CollectionOfDerived.h | 153 +++ src/Interpreters/AsynchronousInsertQueue.cpp | 8 +- src/Interpreters/InterpreterCheckQuery.cpp | 18 +- src/Interpreters/InterpreterCreateQuery.cpp | 11 +- src/Interpreters/InterpreterExplainQuery.cpp | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 615 +++++----- src/Interpreters/InterpreterInsertQuery.h | 17 +- src/Interpreters/SystemLog.cpp | 2 +- src/Processors/Chunk.cpp | 20 +- src/Processors/Chunk.h | 62 +- .../PullingAsyncPipelineExecutor.cpp | 9 +- .../Executors/PullingPipelineExecutor.cpp | 9 +- .../Formats/Impl/ParquetBlockOutputFormat.cpp | 4 +- src/Processors/IAccumulatingTransform.cpp | 5 +- .../FinishAggregatingInOrderAlgorithm.cpp | 17 +- .../Algorithms/MergeTreePartLevelInfo.h | 12 +- .../Algorithms/ReplacingSortedAlgorithm.cpp | 2 +- .../Algorithms/ReplacingSortedAlgorithm.h | 8 +- src/Processors/Merges/IMergingTransform.cpp | 2 +- src/Processors/Merges/IMergingTransform.h | 2 +- src/Processors/Sinks/RemoteSink.h | 2 +- src/Processors/Sinks/SinkToStorage.cpp | 6 +- src/Processors/Sinks/SinkToStorage.h | 37 +- src/Processors/Sources/BlocksSource.h | 5 +- src/Processors/Sources/RemoteSource.cpp | 2 +- .../Sources/SourceFromSingleChunk.cpp | 2 +- .../AggregatingInOrderTransform.cpp | 11 +- .../Transforms/AggregatingInOrderTransform.h | 5 +- .../Transforms/AggregatingTransform.cpp | 16 +- .../Transforms/AggregatingTransform.h | 4 +- src/Processors/Transforms/FilterTransform.cpp | 3 +- .../Transforms/JoiningTransform.cpp | 9 +- src/Processors/Transforms/JoiningTransform.h | 5 +- .../Transforms/MemoryBoundMerging.h | 6 +- ...gingAggregatedMemoryEfficientTransform.cpp | 36 +- ...ergingAggregatedMemoryEfficientTransform.h | 5 +- .../Transforms/MergingAggregatedTransform.cpp | 10 +- .../Transforms/NumberBlocksTransform.cpp | 1 + .../Transforms/NumberBlocksTransform.h | 224 ++++ .../Transforms/SelectByIndicesTransform.h | 3 +- .../Transforms/SquashingChunksTransform.cpp | 10 + .../Transforms/SquashingChunksTransform.h | 2 + .../Transforms/TotalsHavingTransform.cpp | 6 +- .../Transforms/buildPushingToViewsChain.cpp | 91 +- src/QueryPipeline/QueryPipelineBuilder.h | 2 +- src/QueryPipeline/QueryPlanResourceHolder.cpp | 8 +- src/QueryPipeline/QueryPlanResourceHolder.h | 3 + src/Storages/Distributed/DistributedSink.cpp | 8 +- src/Storages/Distributed/DistributedSink.h | 2 +- src/Storages/FileLog/StorageFileLog.cpp | 2 +- src/Storages/HDFS/StorageHDFS.cpp | 4 +- src/Storages/Kafka/StorageKafka.cpp | 2 +- src/Storages/LiveView/LiveViewSink.h | 4 +- src/Storages/MaterializedView/RefreshTask.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 19 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 1 + .../MergeTree/MergeTreeSelectProcessor.cpp | 6 +- .../MergeTree/MergeTreeSequentialSource.cpp | 5 +- src/Storages/MergeTree/MergeTreeSink.cpp | 50 +- src/Storages/MergeTree/MergeTreeSink.h | 4 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 54 +- .../MergeTree/ReplicatedMergeTreeSink.h | 3 +- src/Storages/MessageQueueSink.cpp | 2 +- src/Storages/MessageQueueSink.h | 2 +- src/Storages/NATS/StorageNATS.cpp | 2 +- src/Storages/PartitionedSink.cpp | 4 +- src/Storages/PartitionedSink.h | 2 +- .../MaterializedPostgreSQLConsumer.cpp | 2 +- .../PostgreSQLReplicationHandler.cpp | 2 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 2 +- src/Storages/RocksDB/EmbeddedRocksDBSink.cpp | 2 +- src/Storages/RocksDB/EmbeddedRocksDBSink.h | 2 +- .../RocksDB/StorageEmbeddedRocksDB.cpp | 3 +- src/Storages/S3Queue/StorageS3Queue.cpp | 2 +- src/Storages/StorageAzureBlob.cpp | 4 +- src/Storages/StorageBuffer.cpp | 4 +- src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageFile.cpp | 4 +- src/Storages/StorageKeeperMap.cpp | 9 +- src/Storages/StorageLog.cpp | 6 +- src/Storages/StorageMemory.cpp | 2 +- src/Storages/StorageMongoDB.cpp | 5 +- src/Storages/StorageMySQL.cpp | 4 +- src/Storages/StoragePostgreSQL.cpp | 4 +- src/Storages/StorageRedis.cpp | 9 +- src/Storages/StorageS3.cpp | 4 +- src/Storages/StorageSQLite.cpp | 2 +- src/Storages/StorageSet.cpp | 6 +- src/Storages/StorageStripeLog.cpp | 4 +- src/Storages/StorageURL.cpp | 4 +- src/Storages/StorageURL.h | 2 +- .../System/StorageSystemZooKeeper.cpp | 2 +- src/Storages/WindowView/StorageWindowView.cpp | 4 +- .../0_stateless/03008_deduplication.python | 140 ++- ...uplication_insert_several_blocks.reference | 1088 ++++++++++++++++- ...008_deduplication_insert_several_blocks.sh | 101 +- ...tion_mv_generates_several_blocks.reference | 1032 +++++++++++++++- ...duplication_mv_generates_several_blocks.sh | 99 +- ...cation_several_mv_into_one_table.reference | 784 +++++++++++- ...deduplication_several_mv_into_one_table.sh | 101 +- 100 files changed, 4107 insertions(+), 1004 deletions(-) create mode 100644 src/Common/CollectionOfDerived.h create mode 100644 src/Processors/Transforms/NumberBlocksTransform.cpp create mode 100644 src/Processors/Transforms/NumberBlocksTransform.h diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h new file mode 100644 index 00000000000..8579c4dd50c --- /dev/null +++ b/src/Common/CollectionOfDerived.h @@ -0,0 +1,153 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +template +class CollectionOfDerivedItems +{ +public: + using Self = CollectionOfDerivedItems; + using ItemPtr = std::shared_ptr; + +private: + struct Rec + { + std::type_index type_idx; + ItemPtr ptr; + + bool operator<(const Rec & other) const + { + return type_idx < other.type_idx; + } + + bool operator<(const std::type_index & value) const + { + return type_idx < value; + } + + bool operator==(const Rec & other) const + { + return type_idx == other.type_idx; + } + }; + using Records = std::vector; + +public: + void swap(Self & other) + { + records.swap(other.records); + } + + void clear() + { + records.clear(); + } + + bool empty() const + { + return records.empty(); + } + + size_t size() const + { + return records.size(); + } + + Self clone() const + { + Self result; + result.records.reserve(records.size()); + for (const auto & rec: records) + result.records.emplace_back(rec.type_idx, rec.ptr->clone()); + return result; + } + + void append(Self && other) + { + std::move(other.records.begin(), other.records.end(), std::back_inserter(records)); + std::sort(records.begin(), records.end()); + chassert(isUniqTypes()); + } + + template + void add(std::shared_ptr info) + { + static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); + return addImpl(std::type_index(typeid(T)), std::move(info)); + } + + template + std::shared_ptr get() const + { + static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); + auto it = getImpl(std::type_index(typeid(T))); + if (it == records.cend()) + return nullptr; + auto cast = std::dynamic_pointer_cast(it->ptr); + chassert(cast); + return cast; + } + + template + std::shared_ptr extract() + { + static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); + auto it = getImpl(std::type_index(typeid(T))); + if (it == records.cend()) + return nullptr; + auto cast = std::dynamic_pointer_cast(it->ptr); + chassert(cast); + + records.erase(it); + return cast; + } + +private: + bool isUniqTypes() const + { + auto uniq_it = std::adjacent_find(records.begin(), records.end()); + + return uniq_it == records.end(); + } + + void addImpl(std::type_index type_idx, ItemPtr item) + { + auto it = std::lower_bound(records.begin(), records.end(), type_idx); + + if (it == records.end()) + { + records.emplace_back(type_idx, item); + return; + } + + chassert(it->type_idx != type_idx); + + records.emplace(it, type_idx, item); + + chassert(isUniqTypes()); + } + + Records::const_iterator getImpl(std::type_index type_idx) const + { + auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx); + + if (it == records.cend()) + return records.cend(); + + if (it->type_idx != type_idx) + return records.cend(); + + return it; + } + + Records records; +}; + +} diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index ab29c64184d..65035790729 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -301,7 +301,7 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const auto & insert_query = query->as(); insert_query.async_insert_flush = true; - InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns); + InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns, false, false, false); auto table = interpreter.getTable(insert_query); auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); @@ -780,7 +780,7 @@ try try { interpreter = std::make_unique( - key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true); + key.query, insert_context, key.settings.insert_allow_materialized_columns, true, false, false); pipeline = interpreter->execute().pipeline; chassert(pipeline.pushing()); @@ -999,7 +999,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( } Chunk chunk(executor.getResultColumns(), total_rows); - chunk.setChunkInfo(std::move(chunk_info)); + chunk.getChunkInfos().add(std::move(chunk_info)); return chunk; } @@ -1051,7 +1051,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries( } Chunk chunk(std::move(result_columns), total_rows); - chunk.setChunkInfo(std::move(chunk_info)); + chunk.getChunkInfos().add(std::move(chunk_info)); return chunk; } diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index 4a84a7bf570..e070d8694a7 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -2,6 +2,7 @@ #include #include +#include #include @@ -11,6 +12,7 @@ #include #include #include +#include "Processors/Chunk.h" #include #include @@ -91,7 +93,7 @@ Chunk getChunkFromCheckResult(const String & database, const String & table, con return Chunk(std::move(columns), 1); } -class TableCheckTask : public ChunkInfo +class TableCheckTask : public ChunkInfoCloneable { public: TableCheckTask(StorageID table_id, const std::variant & partition_or_part, ContextPtr context) @@ -110,6 +112,12 @@ public: context->checkAccess(AccessType::SHOW_TABLES, table_->getStorageID()); } + TableCheckTask(const TableCheckTask & other) + : table(other.table) + , check_data_tasks(other.check_data_tasks) + , is_finished(other.is_finished.load()) + {} + std::optional checkNext() const { if (isFinished()) @@ -121,8 +129,8 @@ public: std::this_thread::sleep_for(sleep_time); }); - IStorage::DataValidationTasksPtr check_data_tasks_ = check_data_tasks; - auto result = table->checkDataNext(check_data_tasks_); + IStorage::DataValidationTasksPtr tmp = check_data_tasks; + auto result = table->checkDataNext(tmp); is_finished = !result.has_value(); return result; } @@ -180,7 +188,7 @@ protected: /// source should return at least one row to start pipeline result.addColumn(ColumnUInt8::create(1, 1)); /// actual data stored in chunk info - result.setChunkInfo(std::move(current_check_task)); + result.getChunkInfos().add(std::move(current_check_task)); return result; } @@ -280,7 +288,7 @@ public: protected: void transform(Chunk & chunk) override { - auto table_check_task = std::dynamic_pointer_cast(chunk.getChunkInfo()); + auto table_check_task = chunk.getChunkInfos().get(); auto check_result = table_check_task->checkNext(); if (!check_result) { diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 519cbde588f..a143ca867e1 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1690,8 +1690,15 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) else insert->select = create.select->clone(); - return InterpreterInsertQuery(insert, getContext(), - getContext()->getSettingsRef().insert_allow_materialized_columns).execute(); + return InterpreterInsertQuery( + insert, + getContext(), + getContext()->getSettingsRef().insert_allow_materialized_columns, + false, + false, + false + ) + .execute(); } return {}; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 458be843b59..08d6ac7df9e 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -524,7 +524,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl() } else if (dynamic_cast(ast.getExplainedQuery().get())) { - InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext()); + InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext(), false, false, false, false); auto io = insert.execute(); printPipeline(io.pipeline.getProcessors(), buf); } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index a5396be9b76..40d5a84031d 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +38,7 @@ #include #include #include +#include "Interpreters/Context_fwd.h" namespace ProfileEvents @@ -394,28 +396,323 @@ Chain InterpreterInsertQuery::buildPreSinkChain( return out; } +std::pair, std::vector> InterpreterInsertQuery::buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) +{ + ThreadGroupPtr running_group; + if (current_thread) + running_group = current_thread->getThreadGroup(); + if (!running_group) + running_group = std::make_shared(getContext()); + + std::vector sink_chains; + std::vector presink_chains; + + for (size_t i = 0; i < sink_streams; ++i) + { + LOG_DEBUG(getLogger("InsertQuery"), + "call buildSink table name {}.{}, stream {}/{}", + table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); + + auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, + running_group, /* elapsed_counter_ms= */ nullptr); + + sink_chains.emplace_back(std::move(out)); + } + + for (size_t i = 0; i < presink_streams; ++i) + { + auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); + presink_chains.emplace_back(std::move(out)); + } + + return {std::move(presink_chains), std::move(sink_chains)}; +} + + +QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline() +{ + const Settings & settings = getContext()->getSettingsRef(); + auto & query = query_ptr->as(); + + StoragePtr table = getTable(query); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); + + bool is_trivial_insert_select = false; + + if (settings.optimize_trivial_insert_select) + { + const auto & select_query = query.select->as(); + const auto & selects = select_query.list_of_selects->children; + const auto & union_modes = select_query.list_of_modes; + + /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries + const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; + + is_trivial_insert_select = + std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) + && std::all_of(selects.begin(), selects.end(), isTrivialSelect); + } + + ContextPtr select_context = getContext(); + + if (is_trivial_insert_select) + { + /** When doing trivial INSERT INTO ... SELECT ... FROM table, + * don't need to process SELECT with more than max_insert_threads + * and it's reasonable to set block size for SELECT to the desired block size for INSERT + * to avoid unnecessary squashing. + */ + + LOG_DEBUG(getLogger("InsertQuery"), + "execute() is_trivial_insert_select=true prefersLargeBlocks={}", table->prefersLargeBlocks()); + + Settings new_settings = select_context->getSettings(); + + new_settings.max_threads = std::max(1, settings.max_insert_threads); + + if (table->prefersLargeBlocks()) + { + new_settings.max_block_size = std::max(settings.min_insert_block_size_rows, settings.max_block_size); + new_settings.preferred_block_size_bytes = std::max(settings.min_insert_block_size_bytes, settings.preferred_block_size_bytes); + } + + auto context_for_trivial_select = Context::createCopy(context); + context_for_trivial_select->setSettings(new_settings); + context_for_trivial_select->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); + + select_context = context_for_trivial_select; + } + + QueryPipelineBuilder pipeline; + + { + auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); + + if (settings.allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, select_context, select_query_options); + pipeline = interpreter_select_analyzer.buildQueryPipeline(); + } + else + { + InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options); + pipeline = interpreter_select.buildQueryPipeline(); + } + } + + pipeline.dropTotalsAndExtremes(); + + /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. + if (getContext()->getSettingsRef().insert_null_as_default) + { + const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); + const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); + const auto & output_columns = metadata_snapshot->getColumns(); + + if (input_columns.size() == query_columns.size()) + { + for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) + { + /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with + /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. + if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) + && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) + && !isVariant(query_columns[col_idx].type) + && output_columns.has(query_columns[col_idx].name)) + { + query_sample_block.setColumn( + col_idx, + ColumnWithTypeAndName( + makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), + makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), + query_columns[col_idx].name)); + } + } + } + } + + auto actions_dag = ActionsDAG::makeConvertingActions( + pipeline.getHeader().getColumnsWithTypeAndName(), + query_sample_block.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header, actions); + }); + + /// We need to convert Sparse columns to full, because it's destination storage + /// may not support it or may have different settings for applying Sparse serialization. + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + auto context_ptr = getContext(); + auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + + return counting; + }); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + }); + } + + /// Number of streams works like this: + /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever + /// InterpreterSelectQuery ends up with. + /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. + /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. + /// * If the table supports parallel inserts, use the same streams for writing to IStorage. + /// Otherwise ResizeProcessor them down to 1 stream. + + size_t presink_streams_size = std::max(1, std::max(settings.max_insert_threads, pipeline.getNumStreams())); + size_t sink_streams_size = table->supportsParallelInsert() ? presink_streams_size : 1; + + auto [presink_chains, sink_chains] = buildPreAndSyncChains( + presink_streams_size, sink_streams_size, + table, metadata_snapshot, query_sample_block); + + if (!settings.insert_deduplication_token.value.empty()) + { + pipeline.resize(1); + + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr { + return std::make_shared(settings.insert_deduplication_token.value, in_header); + }); + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr { + return std::make_shared(in_header); + }); + } + + pipeline.resize(presink_chains.size()); + for (auto & chain : presink_chains) + pipeline.addResources(chain.detachResources()); + pipeline.addChains(std::move(presink_chains)); + + pipeline.resize(sink_streams_size); + for (auto & chain : sink_chains) + pipeline.addResources(chain.detachResources()); + pipeline.addChains(std::move(sink_chains)); + + if (!settings.parallel_view_processing) + { + size_t num_select_threads = pipeline.getNumThreads(); + /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. + if (pipeline.getNumThreads() > num_select_threads) + pipeline.setMaxThreads(num_select_threads); + } + else if (pipeline.getNumThreads() < settings.max_threads) + { + /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, + /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. + /// + /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. + pipeline.setMaxThreads(settings.max_threads); + } + + pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr + { + return std::make_shared(cur_header); + }); + + return QueryPipelineBuilder::getPipeline(std::move(pipeline)); +} + + +QueryPipeline InterpreterInsertQuery::buildInsertPipeline() +{ + const Settings & settings = getContext()->getSettingsRef(); + auto & query = query_ptr->as(); + + StoragePtr table = getTable(query); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); + + Chain chain; + + { + auto [presink_chains, sink_chains] = buildPreAndSyncChains( + 1, 1, + table, metadata_snapshot, query_sample_block); + + chain = std::move(presink_chains.front()); + chain.appendChain(std::move(sink_chains.front())); + } + + if (!settings.insert_deduplication_token.value.empty()) + { + chain.addSource(std::make_shared(chain.getInputHeader())); + chain.addSource(std::make_shared(settings.insert_deduplication_token.value, chain.getInputHeader())); + } + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + auto squashing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(squashing)); + } + + auto context_ptr = getContext(); + auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + chain.addSource(std::move(counting)); + + QueryPipeline pipeline = QueryPipeline(std::move(chain)); + pipeline.setNumThreads(std::min(pipeline.getNumThreads(), settings.max_threads)); + pipeline.setConcurrencyControl(settings.use_concurrency_control); + + if (query.hasInlinedData() && !async_insert) + { + /// can execute without additional data + auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); + for (auto && buffer : owned_buffers) + format->addBuffer(std::move(buffer)); + + auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); + pipeline.complete(std::move(pipe)); + } + + return pipeline; +} + + BlockIO InterpreterInsertQuery::execute() { const Settings & settings = getContext()->getSettingsRef(); auto & query = query_ptr->as(); - QueryPipelineBuilder pipeline; - std::optional distributed_pipeline; - QueryPlanResourceHolder resources; StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); - StoragePtr inner_table; - if (const auto * mv = dynamic_cast(table.get())) - inner_table = mv->getTargetTable(); - if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout); - auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); /// For table functions we check access while executing @@ -423,303 +720,37 @@ BlockIO InterpreterInsertQuery::execute() if (!query.table_function) getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); - if (query.select && settings.parallel_distributed_insert_select) - // Distributed INSERT SELECT - distributed_pipeline = table->distributedWrite(query, getContext()); - - std::vector presink_chains; - std::vector sink_chains; - if (!distributed_pipeline) + if (!allow_materialized) { - /// Number of streams works like this: - /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever - /// InterpreterSelectQuery ends up with. - /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. - /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. - /// * If the table supports parallel inserts, use the same streams for writing to IStorage. - /// Otherwise ResizeProcessor them down to 1 stream. - /// * If it's not an INSERT SELECT, forget all that and use one stream. - size_t pre_streams_size = 1; - size_t sink_streams_size = 1; - - if (query.select) - { - bool is_trivial_insert_select = false; - - if (settings.optimize_trivial_insert_select) - { - const auto & select_query = query.select->as(); - const auto & selects = select_query.list_of_selects->children; - const auto & union_modes = select_query.list_of_modes; - - /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries - const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; - - is_trivial_insert_select = - std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) - && std::all_of(selects.begin(), selects.end(), isTrivialSelect); - } - - if (is_trivial_insert_select) - { - /** When doing trivial INSERT INTO ... SELECT ... FROM table, - * don't need to process SELECT with more than max_insert_threads - * and it's reasonable to set block size for SELECT to the desired block size for INSERT - * to avoid unnecessary squashing. - */ - - LOG_DEBUG(getLogger("InsertQuery"), - "execute() is_trivial_insert_select=true prefersLargeBlocks={}", table->prefersLargeBlocks()); - - Settings new_settings = getContext()->getSettings(); - - new_settings.max_threads = std::max(1, settings.max_insert_threads); - - if (table->prefersLargeBlocks()) - { - new_settings.max_block_size = std::max(settings.min_insert_block_size_rows, settings.max_block_size); - new_settings.preferred_block_size_bytes = std::max(settings.min_insert_block_size_bytes, settings.preferred_block_size_bytes); - } - - auto new_context = Context::createCopy(context); - new_context->setSettings(new_settings); - new_context->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); - - auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); - - if (settings.allow_experimental_analyzer) - { - InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, new_context, select_query_options); - pipeline = interpreter_select_analyzer.buildQueryPipeline(); - } - else - { - InterpreterSelectWithUnionQuery interpreter_select(query.select, new_context, select_query_options); - pipeline = interpreter_select.buildQueryPipeline(); - } - } - else - { - /// Passing 1 as subquery_depth will disable limiting size of intermediate result. - auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); - - if (settings.allow_experimental_analyzer) - { - InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, getContext(), select_query_options); - pipeline = interpreter_select_analyzer.buildQueryPipeline(); - } - else - { - InterpreterSelectWithUnionQuery interpreter_select(query.select, getContext(), select_query_options); - pipeline = interpreter_select.buildQueryPipeline(); - } - } - - pipeline.dropTotalsAndExtremes(); - - if (settings.max_insert_threads > 1) - { - pre_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); - - -// /// Deduplication when passing insert_deduplication_token breaks if using more than one thread -// if (!settings.insert_deduplication_token.toString().empty()) -// { -// /// TODO! -// LOG_DEBUG( -// getLogger("InsertQuery"), -// "Insert-select query using insert_deduplication_token, setting streams from {} to 1 to avoid deduplication issues, pipeline.getNumStreams() {}", -// pre_streams_size, pipeline.getNumStreams()); -// pre_streams_size = 1; -// } - - if (table->supportsParallelInsert()) - sink_streams_size = pre_streams_size; - } - - LOG_DEBUG( - getLogger("InsertQuery"), - "pre_streams_size {}, pipeline.getNumStreams() {}", - pre_streams_size, pipeline.getNumStreams()); - - pipeline.resize(pre_streams_size); - - /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. - if (getContext()->getSettingsRef().insert_null_as_default) - { - const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); - const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); - const auto & output_columns = metadata_snapshot->getColumns(); - - if (input_columns.size() == query_columns.size()) - { - for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) - { - /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with - /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. - if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) && !isVariant(query_columns[col_idx].type) && output_columns.has(query_columns[col_idx].name)) - query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name)); - } - } - } - } - - ThreadGroupPtr running_group; - if (current_thread) - running_group = current_thread->getThreadGroup(); - if (!running_group) - running_group = std::make_shared(getContext()); - for (size_t i = 0; i < sink_streams_size; ++i) - { - LOG_DEBUG(getLogger("InsertQuery"), - "call buildSink table name {}.{}, stream {}/{}", - table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams_size); - - auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, - running_group, /* elapsed_counter_ms= */ nullptr); - - sink_chains.emplace_back(std::move(out)); - } - for (size_t i = 0; i < pre_streams_size; ++i) - { - auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); - presink_chains.emplace_back(std::move(out)); - } + for (const auto & column : metadata_snapshot->getColumns()) + if (column.default_desc.kind == ColumnDefaultKind::Materialized && query_sample_block.has(column.name)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); } BlockIO res; - /// What type of query: INSERT or INSERT SELECT or INSERT WATCH? - if (distributed_pipeline) + if (query.select) { - res.pipeline = std::move(*distributed_pipeline); - } - else if (query.select) - { - const auto & header = presink_chains.at(0).getInputHeader(); - auto actions_dag = ActionsDAG::makeConvertingActions( - pipeline.getHeader().getColumnsWithTypeAndName(), - header.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + if (settings.parallel_distributed_insert_select) { - return std::make_shared(in_header, actions); - }); - - /// We need to convert Sparse columns to full, because it's destination storage - /// may not support it or may have different settings for applying Sparse serialization. - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - auto context_ptr = getContext(); - auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - - return counting; - }); - - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - }); + res.pipeline = *table->distributedWrite(query, getContext()); } - - size_t num_select_threads = pipeline.getNumThreads(); - - for (auto & chain : presink_chains) - resources = chain.detachResources(); - for (auto & chain : sink_chains) - resources = chain.detachResources(); - - pipeline.addChains(std::move(presink_chains)); - pipeline.resize(sink_chains.size()); - pipeline.addChains(std::move(sink_chains)); - - if (!settings.parallel_view_processing) + else { - /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. - if (pipeline.getNumThreads() > num_select_threads) - pipeline.setMaxThreads(num_select_threads); + res.pipeline = buildInsertSelectPipeline(); } - else if (pipeline.getNumThreads() < settings.max_threads) - { - /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, - /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. - /// - /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. - pipeline.setMaxThreads(settings.max_threads); - } - - pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr - { - return std::make_shared(cur_header); - }); - - if (!allow_materialized) - { - for (const auto & column : metadata_snapshot->getColumns()) - if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); - } - - res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); } else { - auto & chain = presink_chains.at(0); - chain.appendChain(std::move(sink_chains.at(0))); - - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - auto squashing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(squashing)); - } - - auto context_ptr = getContext(); - auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - chain.addSource(std::move(counting)); - - res.pipeline = QueryPipeline(std::move(presink_chains[0])); - res.pipeline.setNumThreads(std::min(res.pipeline.getNumThreads(), settings.max_threads)); - res.pipeline.setConcurrencyControl(settings.use_concurrency_control); - - if (query.hasInlinedData() && !async_insert) - { - /// can execute without additional data - auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); - for (auto && buffer : owned_buffers) - format->addBuffer(std::move(buffer)); - - auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); - res.pipeline.complete(std::move(pipe)); - } + res.pipeline = buildInsertPipeline(); } - res.pipeline.addResources(std::move(resources)); - res.pipeline.addStorageHolder(table); + + StoragePtr inner_table; + if (const auto * mv = dynamic_cast(table.get())) + inner_table = mv->getTargetTable(); + if (inner_table) res.pipeline.addStorageHolder(inner_table); @@ -742,17 +773,21 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, Cont } } + void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const { extendQueryLogElemImpl(elem, context_); } + void registerInterpreterInsertQuery(InterpreterFactory & factory) { auto create_fn = [] (const InterpreterFactory::Arguments & args) { - return std::make_unique(args.query, args.context, args.allow_materialized); + return std::make_unique(args.query, args.context, args.allow_materialized, false, false, false); }; factory.registerInterpreter("InterpreterInsertQuery", create_fn); } + + } diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index bf73fb2a319..3f3b7a6f106 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -23,10 +23,10 @@ public: InterpreterInsertQuery( const ASTPtr & query_ptr_, ContextPtr context_, - bool allow_materialized_ = false, - bool no_squash_ = false, - bool no_destination_ = false, - bool async_insert_ = false); + bool allow_materialized_, + bool no_squash_, + bool no_destination, + bool async_insert_); /** Prepare a request for execution. Return block streams * - the stream into which you can write data to execute the query, if INSERT; @@ -73,12 +73,17 @@ private: ASTPtr query_ptr; const bool allow_materialized; - const bool no_squash; - const bool no_destination; + bool no_squash = false; + bool no_destination = false; const bool async_insert; std::vector> owned_buffers; + std::pair, std::vector> buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); + + QueryPipeline buildInsertSelectPipeline(); + QueryPipeline buildInsertPipeline(); + Chain buildSink( const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 3af8761ff8e..2d5109a612c 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -522,7 +522,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, /// We always want to deliver the data to the original table regardless of the MVs insert_context->setSetting("materialized_views_ignore_errors", true); - InterpreterInsertQuery interpreter(query_ptr, insert_context); + InterpreterInsertQuery interpreter(query_ptr, insert_context, false, false, false, false); BlockIO io = interpreter.execute(); PushingPipelineExecutor executor(io.pipeline); diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 2631f665f9c..13df2e64421 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -19,14 +19,6 @@ Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns checkNumRowsIsConsistent(); } -Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) - : columns(std::move(columns_)) - , num_rows(num_rows_) - , chunk_info(std::move(chunk_info_)) -{ - checkNumRowsIsConsistent(); -} - static Columns unmuteColumns(MutableColumns && mutable_columns) { Columns columns; @@ -43,17 +35,11 @@ Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_) checkNumRowsIsConsistent(); } -Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) - : columns(unmuteColumns(std::move(columns_))) - , num_rows(num_rows_) - , chunk_info(std::move(chunk_info_)) -{ - checkNumRowsIsConsistent(); -} - Chunk Chunk::clone() const { - return Chunk(getColumns(), getNumRows(), chunk_info); + auto tmp = Chunk(getColumns(), getNumRows()); + tmp.setChunkInfos(chunk_infos.clone()); + return tmp; } void Chunk::setColumns(Columns columns_, UInt64 num_rows_) diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 4f753798eaa..b4345d18a08 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -1,7 +1,15 @@ #pragma once +#include "base/defines.h" + +#include #include + +#include +#include +#include #include +#include namespace DB { @@ -9,11 +17,29 @@ namespace DB class ChunkInfo { public: - virtual ~ChunkInfo() = default; + using Ptr = std::shared_ptr; + ChunkInfo() = default; + ChunkInfo(const ChunkInfo&) = default; + ChunkInfo(ChunkInfo&&) = default; + + virtual Ptr clone() const = 0; + virtual ~ChunkInfo() = default; }; -using ChunkInfoPtr = std::shared_ptr; + +template +class ChunkInfoCloneable : public ChunkInfo +{ +public: + ChunkInfoCloneable() = default; + ChunkInfoCloneable(const ChunkInfoCloneable & other) = default; + + Ptr clone() const override + { + return std::static_pointer_cast(std::make_shared(*static_cast(this))); + } +}; /** * Chunk is a list of columns with the same length. @@ -32,26 +58,26 @@ using ChunkInfoPtr = std::shared_ptr; class Chunk { public: + using ChunkInfoCollection = CollectionOfDerivedItems; + Chunk() = default; Chunk(const Chunk & other) = delete; Chunk(Chunk && other) noexcept : columns(std::move(other.columns)) , num_rows(other.num_rows) - , chunk_info(std::move(other.chunk_info)) + , chunk_infos(std::move(other.chunk_infos)) { other.num_rows = 0; } Chunk(Columns columns_, UInt64 num_rows_); - Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk(MutableColumns columns_, UInt64 num_rows_); - Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk & operator=(const Chunk & other) = delete; Chunk & operator=(Chunk && other) noexcept { columns = std::move(other.columns); - chunk_info = std::move(other.chunk_info); + chunk_infos = std::move(other.chunk_infos); num_rows = other.num_rows; other.num_rows = 0; return *this; @@ -62,15 +88,15 @@ public: void swap(Chunk & other) noexcept { columns.swap(other.columns); - chunk_info.swap(other.chunk_info); std::swap(num_rows, other.num_rows); + chunk_infos.swap(other.chunk_infos); } void clear() { num_rows = 0; columns.clear(); - chunk_info.reset(); + chunk_infos.clear(); } const Columns & getColumns() const { return columns; } @@ -81,9 +107,9 @@ public: /** Get empty columns with the same types as in block. */ MutableColumns cloneEmptyColumns() const; - const ChunkInfoPtr & getChunkInfo() const { return chunk_info; } - bool hasChunkInfo() const { return chunk_info != nullptr; } - void setChunkInfo(ChunkInfoPtr chunk_info_) { chunk_info = std::move(chunk_info_); } + ChunkInfoCollection & getChunkInfos() { return chunk_infos; } + const ChunkInfoCollection & getChunkInfos() const { return chunk_infos; } + void setChunkInfos(ChunkInfoCollection chunk_infos_) { chunk_infos = std::move(chunk_infos_); } UInt64 getNumRows() const { return num_rows; } UInt64 getNumColumns() const { return columns.size(); } @@ -107,7 +133,7 @@ public: private: Columns columns; UInt64 num_rows = 0; - ChunkInfoPtr chunk_info; + ChunkInfoCollection chunk_infos; void checkNumRowsIsConsistent(); }; @@ -117,11 +143,15 @@ using Chunks = std::vector; /// AsyncInsert needs two kinds of information: /// - offsets of different sub-chunks /// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`. -class AsyncInsertInfo : public ChunkInfo +class AsyncInsertInfo : public ChunkInfoCloneable { public: AsyncInsertInfo() = default; - explicit AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) : offsets(offsets_), tokens(tokens_) {} + AsyncInsertInfo(const AsyncInsertInfo & other) = default; + AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) + : offsets(offsets_) + , tokens(tokens_) + {} std::vector offsets; std::vector tokens; @@ -130,9 +160,11 @@ public: using AsyncInsertInfoPtr = std::shared_ptr; /// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults. -class ChunkMissingValues : public ChunkInfo +class ChunkMissingValues : public ChunkInfoCloneable { public: + ChunkMissingValues(const ChunkMissingValues & other) = default; + using RowsBitMask = std::vector; /// a bit per row for a column const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index d27002197d2..d9fab88fe1f 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -147,13 +147,10 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto chunk_info = chunk.getChunkInfo()) + if (auto agg_info = chunk.getChunkInfos().get()) { - if (const auto * agg_info = typeid_cast(chunk_info.get())) - { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; - } + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; } return true; diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index cbf73c5cb07..25c15d40c9a 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -73,13 +73,10 @@ bool PullingPipelineExecutor::pull(Block & block) } block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto chunk_info = chunk.getChunkInfo()) + if (auto agg_info = chunk.getChunkInfos().get()) { - if (const auto * agg_info = typeid_cast(chunk_info.get())) - { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; - } + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; } return true; diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 9c85dab70c4..6067e2f3db3 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -180,7 +180,9 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count); Chunks piece; - piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo()); + piece.emplace_back(std::move(columns), count); + piece.back().setChunkInfos(concatenated.getChunkInfos()); + writeRowGroup(std::move(piece)); } } diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp index 4136fc5a5f2..46be6e74693 100644 --- a/src/Processors/IAccumulatingTransform.cpp +++ b/src/Processors/IAccumulatingTransform.cpp @@ -8,8 +8,9 @@ namespace ErrorCodes } IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header) - : IProcessor({std::move(input_header)}, {std::move(output_header)}), - input(inputs.front()), output(outputs.front()) + : IProcessor({std::move(input_header)}, {std::move(output_header)}) + , input(inputs.front()) + , output(outputs.front()) { } diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp index a5befca7233..f33cc267c44 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp @@ -51,16 +51,11 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num if (!input.chunk.hasRows()) return; - const auto & info = input.chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm"); + const auto & arenas_info = input.chunk.getChunkInfos().get(); + if (!arenas_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ChunkInfoWithAllocatedBytes was not set for chunk in FinishAggregatingInOrderAlgorithm"); - Int64 allocated_bytes = 0; - /// Will be set by AggregatingInOrderTransform during local aggregation; will be nullptr during merging on initiator. - if (const auto * arenas_info = typeid_cast(info.get())) - allocated_bytes = arenas_info->allocated_bytes; - - states[source_num] = State{input.chunk, description, allocated_bytes}; + states[source_num] = State{input.chunk, description, arenas_info->allocated_bytes}; } IMergingAlgorithm::Status FinishAggregatingInOrderAlgorithm::merge() @@ -134,7 +129,7 @@ Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge() info->chunk_num = chunk_num++; Chunk chunk; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); return chunk; } @@ -161,7 +156,7 @@ void FinishAggregatingInOrderAlgorithm::addToAggregation() chunks.emplace_back(std::move(new_columns), current_rows); } - chunks.back().setChunkInfo(std::make_shared()); + chunks.back().getChunkInfos().add(std::make_shared()); states[i].current_row = states[i].to_row; /// We assume that sizes in bytes of rows are almost the same. diff --git a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h index bcf4e759024..e4f22deec8d 100644 --- a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h +++ b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h @@ -6,18 +6,22 @@ namespace DB { /// To carry part level if chunk is produced by a merge tree source -class MergeTreePartLevelInfo : public ChunkInfo +class MergeTreePartLevelInfo : public ChunkInfoCloneable { public: MergeTreePartLevelInfo() = delete; - explicit MergeTreePartLevelInfo(ssize_t part_level) : origin_merge_tree_part_level(part_level) { } + explicit MergeTreePartLevelInfo(ssize_t part_level) + : origin_merge_tree_part_level(part_level) + { } + MergeTreePartLevelInfo(const MergeTreePartLevelInfo & other) = default; + size_t origin_merge_tree_part_level = 0; }; inline size_t getPartLevelFromChunk(const Chunk & chunk) { - const auto & info = chunk.getChunkInfo(); - if (const auto * part_level_info = typeid_cast(info.get())) + const auto part_level_info = chunk.getChunkInfos().get(); + if (part_level_info) return part_level_info->origin_merge_tree_part_level; return 0; } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index 9e5c1249c4e..d0b0291511d 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes static IMergingAlgorithm::Status emitChunk(detail::SharedChunkPtr & chunk, bool finished = false) { - chunk->setChunkInfo(std::make_shared(std::move(chunk->replace_final_selection))); + chunk->getChunkInfos().add(std::make_shared(std::move(chunk->replace_final_selection))); return IMergingAlgorithm::Status(std::move(*chunk), finished); } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index 2fbd73c9072..770510232cc 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -1,8 +1,10 @@ #pragma once +#include #include #include #include #include +#include "Processors/Chunk.h" namespace Poco { @@ -14,11 +16,13 @@ namespace DB /** Use in skipping final to keep list of indices of selected row after merging final */ -struct ChunkSelectFinalIndices : public ChunkInfo +struct ChunkSelectFinalIndices : public ChunkInfoCloneable { + explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); + ChunkSelectFinalIndices(const ChunkSelectFinalIndices & other) = default; + const ColumnPtr column_holder; const ColumnUInt64 * select_final_indices = nullptr; - explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); }; /** Merges several sorted inputs into one. diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index fbb47969b2f..b1b0182a113 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -157,7 +157,7 @@ IProcessor::Status IMergingTransformBase::prepare() bool is_port_full = !output.canPush(); /// Push if has data. - if ((state.output_chunk || state.output_chunk.hasChunkInfo()) && !is_port_full) + if ((state.output_chunk || !state.output_chunk.getChunkInfos().empty()) && !is_port_full) output.push(std::move(state.output_chunk)); if (!is_initialized) diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index c218f622870..be629271736 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -129,7 +129,7 @@ public: IMergingAlgorithm::Status status = algorithm.merge(); - if ((status.chunk && status.chunk.hasRows()) || status.chunk.hasChunkInfo()) + if ((status.chunk && status.chunk.hasRows()) || !status.chunk.getChunkInfos().empty()) { // std::cerr << "Got chunk with " << status.chunk.getNumRows() << " rows" << std::endl; state.output_chunk = std::move(status.chunk); diff --git a/src/Processors/Sinks/RemoteSink.h b/src/Processors/Sinks/RemoteSink.h index 30cf958c072..c05cc1defcb 100644 --- a/src/Processors/Sinks/RemoteSink.h +++ b/src/Processors/Sinks/RemoteSink.h @@ -20,7 +20,7 @@ public: } String getName() const override { return "RemoteSink"; } - void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); } + void consume (Chunk & chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.getColumns())); } void onFinish() override { RemoteInserter::onFinish(); } }; diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 5f9f9f9b1a1..146bd4505a4 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -15,8 +15,10 @@ void SinkToStorage::onConsume(Chunk chunk) */ Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); - consume(chunk.clone()); - if (!lastBlockIsDuplicate()) + setDeduplicationTokenForChildren(chunk); + fillDeduplicationTokenForChildren(chunk); + consume(chunk); + if (!lastBlockIsDuplicate()) // TODO: remove that cur_chunk = std::move(chunk); } diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index 023bbd8b094..07a944b0943 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -1,6 +1,10 @@ #pragma once +#include #include +#include #include +#include +#include "Processors/Transforms/NumberBlocksTransform.h" namespace DB { @@ -18,9 +22,38 @@ public: void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } protected: - virtual void consume(Chunk chunk) = 0; + virtual void consume(Chunk & chunk) = 0; virtual bool lastBlockIsDuplicate() const { return false; } + virtual std::shared_ptr setDeduplicationTokenForChildren(Chunk & chunk) const + { + auto token_info = chunk.getChunkInfos().get(); + if (token_info) + return token_info; + + auto block_dedup_token_for_children = std::make_shared(""); + chunk.getChunkInfos().add(block_dedup_token_for_children); + return block_dedup_token_for_children; + } + + virtual std::shared_ptr getDeduplicationTokenForChildren(Chunk & chunk) const + { + return chunk.getChunkInfos().get(); + } + + virtual void fillDeduplicationTokenForChildren(Chunk & chunk) const + { + SipHash hash; + for (const auto & colunm: chunk.getColumns()) + { + colunm->updateHashFast(hash); + } + const auto hash_value = hash.get128(); + + chunk.getChunkInfos().get()->addTokenPart( + fmt::format(":hash-{}", toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]))); + } + private: std::vector table_locks; @@ -38,7 +71,7 @@ class NullSinkToStorage : public SinkToStorage public: using SinkToStorage::SinkToStorage; std::string getName() const override { return "NullSinkToStorage"; } - void consume(Chunk) override {} + void consume(Chunk &) override {} }; using SinkPtr = std::shared_ptr; diff --git a/src/Processors/Sources/BlocksSource.h b/src/Processors/Sources/BlocksSource.h index ec0dc9609f1..7ac460c14e2 100644 --- a/src/Processors/Sources/BlocksSource.h +++ b/src/Processors/Sources/BlocksSource.h @@ -43,7 +43,10 @@ protected: info->bucket_num = res.info.bucket_num; info->is_overflows = res.info.is_overflows; - return Chunk(res.getColumns(), res.rows(), std::move(info)); + auto chunk = Chunk(res.getColumns(), res.rows()); + chunk.getChunkInfos().add(std::move(info)); + + return chunk; } private: diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 3d7dd3f76b8..1578bd389c9 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -176,7 +176,7 @@ std::optional RemoteSource::tryGenerate() auto info = std::make_shared(); info->bucket_num = block.info.bucket_num; info->is_overflows = block.info.is_overflows; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); } return chunk; diff --git a/src/Processors/Sources/SourceFromSingleChunk.cpp b/src/Processors/Sources/SourceFromSingleChunk.cpp index 00f40a34361..fb888c104c4 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.cpp +++ b/src/Processors/Sources/SourceFromSingleChunk.cpp @@ -20,7 +20,7 @@ SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmp auto info = std::make_shared(); info->bucket_num = data.info.bucket_num; info->is_overflows = data.info.is_overflows; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 9ffe15d0f85..45b0960ec8f 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -332,7 +332,7 @@ void AggregatingInOrderTransform::generate() variants.aggregates_pool = variants.aggregates_pools.at(0).get(); /// Pass info about used memory by aggregate functions further. - to_push_chunk.setChunkInfo(std::make_shared(cur_block_bytes)); + to_push_chunk.getChunkInfos().add(std::make_shared(cur_block_bytes)); cur_block_bytes = 0; cur_block_size = 0; @@ -351,11 +351,12 @@ FinalizeAggregatedTransform::FinalizeAggregatedTransform(Block header, Aggregati void FinalizeAggregatedTransform::transform(Chunk & chunk) { if (params->final) - finalizeChunk(chunk, aggregates_mask); - else if (!chunk.getChunkInfo()) { - auto info = std::make_shared(); - chunk.setChunkInfo(std::move(info)); + finalizeChunk(chunk, aggregates_mask); + } + else if (!chunk.getChunkInfos().get()) + { + chunk.getChunkInfos().add(std::make_shared()); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 5d50e97f552..6433f862dfd 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -5,6 +5,7 @@ #include #include #include +#include "Processors/Chunk.h" namespace DB { @@ -12,10 +13,12 @@ namespace DB struct InputOrderInfo; using InputOrderInfoPtr = std::shared_ptr; -struct ChunkInfoWithAllocatedBytes : public ChunkInfo +struct ChunkInfoWithAllocatedBytes : public ChunkInfoCloneable { + ChunkInfoWithAllocatedBytes(const ChunkInfoWithAllocatedBytes & other) = default; explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_) : allocated_bytes(allocated_bytes_) {} + Int64 allocated_bytes; }; diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index b48d435720a..d6595ef9e9a 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -35,7 +35,7 @@ Chunk convertToChunk(const Block & block) UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); return chunk; } @@ -44,15 +44,11 @@ namespace { const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk) { - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk."); - - const auto * agg_info = typeid_cast(info.get()); + auto agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo."); - return agg_info; + return agg_info.get(); } /// Reads chunks from file in native format. Provide chunks with aggregation info. @@ -210,11 +206,7 @@ private: void process(Chunk && chunk) { - if (!chunk.hasChunkInfo()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with chunk info in {}", getName()); - - const auto & info = chunk.getChunkInfo(); - const auto * chunks_to_merge = typeid_cast(info.get()); + auto chunks_to_merge = chunk.getChunkInfos().get(); if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with ChunksToMerge info in {}", getName()); diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index e167acde067..430a9a6e50a 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -8,6 +9,7 @@ #include #include #include +#include "Processors/Chunk.h" namespace CurrentMetrics { @@ -19,7 +21,7 @@ namespace CurrentMetrics namespace DB { -class AggregatedChunkInfo : public ChunkInfo +class AggregatedChunkInfo : public ChunkInfoCloneable { public: bool is_overflows = false; diff --git a/src/Processors/Transforms/FilterTransform.cpp b/src/Processors/Transforms/FilterTransform.cpp index 0793bb3db5b..36aea045b18 100644 --- a/src/Processors/Transforms/FilterTransform.cpp +++ b/src/Processors/Transforms/FilterTransform.cpp @@ -38,10 +38,9 @@ static void replaceFilterToConstant(Block & block, const String & filter_column_ static std::shared_ptr getSelectByFinalIndices(Chunk & chunk) { - if (auto select_final_indices_info = std::dynamic_pointer_cast(chunk.getChunkInfo())) + if (auto select_final_indices_info = chunk.getChunkInfos().extract()) { const auto & index_column = select_final_indices_info->select_final_indices; - chunk.setChunkInfo(nullptr); if (index_column && index_column->size() != chunk.getNumRows()) return select_final_indices_info; } diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 3e2a9462e54..ca204bcb482 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -365,10 +365,9 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare() return Status::Finished; } - if (!data.chunk.hasChunkInfo()) + task = data.chunk.getChunkInfos().get(); + if (!task) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info"); - - task = std::dynamic_pointer_cast(data.chunk.getChunkInfo()); } else { @@ -479,7 +478,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() if (output.isFinished()) continue; Chunk chunk; - chunk.setChunkInfo(std::make_shared()); + chunk.getChunkInfos().add(std::make_shared()); output.push(std::move(chunk)); output.finish(); } @@ -496,7 +495,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() { Chunk chunk; auto task = std::make_shared(delayed_blocks, left_delayed_stream_finished_counter); - chunk.setChunkInfo(task); + chunk.getChunkInfos().add(std::move(task)); output.push(std::move(chunk)); } delayed_blocks = nullptr; diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index a308af03662..5fdea2524e2 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -1,5 +1,7 @@ #pragma once +#include #include +#include "Processors/Chunk.h" namespace DB @@ -111,11 +113,12 @@ private: }; -class DelayedBlocksTask : public ChunkInfo +class DelayedBlocksTask : public ChunkInfoCloneable { public: DelayedBlocksTask() = default; + DelayedBlocksTask(const DelayedBlocksTask & other) = default; explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_) : delayed_blocks(std::move(delayed_blocks_)) , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_) diff --git a/src/Processors/Transforms/MemoryBoundMerging.h b/src/Processors/Transforms/MemoryBoundMerging.h index 607087fb39c..d7bc320173b 100644 --- a/src/Processors/Transforms/MemoryBoundMerging.h +++ b/src/Processors/Transforms/MemoryBoundMerging.h @@ -150,11 +150,7 @@ private: if (!chunk.hasRows()) return; - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform."); - - const auto * agg_info = typeid_cast(info.get()); + const auto & agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception( ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedForMemoryBoundMergingTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index fc40c6894bb..ea9ebb0f96e 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -30,10 +30,10 @@ void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_ auto info = std::make_shared(); info->bucket_num = bucket; info->is_overflows = is_overflows; - info->chunks = std::make_unique(std::move(chunks)); + info->chunks = std::make_shared(std::move(chunks)); Chunk chunk; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); output.push(std::move(chunk)); } @@ -255,11 +255,10 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) if (!chunk.hasRows()) return; - const auto & info = chunk.getChunkInfo(); - if (!info) + if (chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in GroupingAggregatedTransform."); - if (const auto * agg_info = typeid_cast(info.get())) + if (auto agg_info = chunk.getChunkInfos().get()) { Int32 bucket = agg_info->bucket_num; bool is_overflows = agg_info->is_overflows; @@ -275,7 +274,7 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) last_bucket_number[input] = bucket; } } - else if (typeid_cast(info.get())) + else if (chunk.getChunkInfos().get()) { single_level_chunks.emplace_back(std::move(chunk)); } @@ -304,7 +303,11 @@ void GroupingAggregatedTransform::work() Int32 bucket = cur_block.info.bucket_num; auto chunk_info = std::make_shared(); chunk_info->bucket_num = bucket; - chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info))); + + auto chunk = Chunk(cur_block.getColumns(), cur_block.rows()); + chunk.getChunkInfos().add(std::move(chunk_info)); + + chunks_map[bucket].emplace_back(std::move(chunk)); } } } @@ -319,9 +322,7 @@ MergingAggregatedBucketTransform::MergingAggregatedBucketTransform( void MergingAggregatedBucketTransform::transform(Chunk & chunk) { - const auto & info = chunk.getChunkInfo(); - const auto * chunks_to_merge = typeid_cast(info.get()); - + auto chunks_to_merge = chunk.getChunkInfos().get(); if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge."); @@ -330,11 +331,10 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) BlocksList blocks_list; for (auto & cur_chunk : *chunks_to_merge->chunks) { - const auto & cur_info = cur_chunk.getChunkInfo(); - if (!cur_info) + if (cur_chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedBucketTransform."); - if (const auto * agg_info = typeid_cast(cur_info.get())) + if (auto agg_info = cur_chunk.getChunkInfos().get()) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = agg_info->is_overflows; @@ -342,7 +342,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) blocks_list.emplace_back(std::move(block)); } - else if (typeid_cast(cur_info.get())) + else if (cur_chunk.getChunkInfos().get()) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = false; @@ -361,7 +361,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) res_info->is_overflows = chunks_to_merge->is_overflows; res_info->bucket_num = chunks_to_merge->bucket_num; res_info->chunk_num = chunks_to_merge->chunk_num; - chunk.setChunkInfo(std::move(res_info)); + chunk.getChunkInfos().add(std::move(res_info)); auto block = params->aggregator.mergeBlocks(blocks_list, params->final, is_cancelled); @@ -405,11 +405,7 @@ bool SortingAggregatedTransform::tryPushChunk() void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input) { - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedTransform."); - - const auto * agg_info = typeid_cast(info.get()); + auto agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h index 77ee3034ffc..958b43b11ed 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h @@ -2,6 +2,7 @@ #include #include +#include "Processors/Chunk.h" #include #include #include @@ -142,9 +143,9 @@ private: void addChunk(Chunk chunk, size_t from_input); }; -struct ChunksToMerge : public ChunkInfo +struct ChunksToMerge : public ChunkInfoCloneable { - std::unique_ptr chunks; + std::shared_ptr chunks; Int32 bucket_num = -1; bool is_overflows = false; UInt64 chunk_num = 0; // chunk number in order of generation, used during memory bound merging to restore chunks order diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index ad723da7527..446e60a0b81 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -32,11 +32,10 @@ void MergingAggregatedTransform::consume(Chunk chunk) total_input_rows += input_rows; ++total_input_blocks; - const auto & info = chunk.getChunkInfo(); - if (!info) + if (chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedTransform."); - if (const auto * agg_info = typeid_cast(info.get())) + if (auto agg_info = chunk.getChunkInfos().get()) { /** If the remote servers used a two-level aggregation method, * then blocks will contain information about the number of the bucket. @@ -49,7 +48,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) bucket_to_blocks[agg_info->bucket_num].emplace_back(std::move(block)); } - else if (typeid_cast(info.get())) + else if (chunk.getChunkInfos().get()) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); block.info.is_overflows = false; @@ -89,7 +88,8 @@ Chunk MergingAggregatedTransform::generate() UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - chunk.setChunkInfo(std::move(info)); + + chunk.getChunkInfos().add(std::move(info)); return chunk; } diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/NumberBlocksTransform.cpp new file mode 100644 index 00000000000..61ff3f6bfd5 --- /dev/null +++ b/src/Processors/Transforms/NumberBlocksTransform.cpp @@ -0,0 +1 @@ +#include diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h new file mode 100644 index 00000000000..ca990a925c1 --- /dev/null +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -0,0 +1,224 @@ +#pragma once + +#include +#include +#include +#include + +#include + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace DB +{ + struct SerialBlockNumberInfo : public ChunkInfoCloneable + { + SerialBlockNumberInfo(const SerialBlockNumberInfo & other) = default; + explicit SerialBlockNumberInfo(size_t block_number_) + : block_number(block_number_) + { + } + + size_t block_number = 0; + }; + + + class NumberBlocksTransform : public ISimpleTransform + { + public: + explicit NumberBlocksTransform(const Block & header) + : ISimpleTransform(header, header, true) + { + } + + String getName() const override { return "NumberBlocksTransform"; } + + void transform(Chunk & chunk) override + { + chunk.getChunkInfos().add(std::make_shared(block_number++)); + } + + private: + size_t block_number = 0; + }; + + + class DedupTokenInfo : public ChunkInfoCloneable + { + public: + DedupTokenInfo(const DedupTokenInfo & other) = default; + explicit DedupTokenInfo(String first_part) + { + addTokenPart(std::move(first_part)); + } + + String getToken() const + { + String result; + result.reserve(getTotalSize()); + + for (const auto & part : token_parts) + { + result.append(part); + } + + return result; + } + + void addTokenPart(String part) + { + token_parts.push_back(std::move(part)); + } + + private: + size_t getTotalSize() const + { + size_t size = 0; + for (const auto & part : token_parts) + size += part.size(); + return size; + } + + std::vector token_parts; + }; + + class AddUserDeduplicationTokenTransform : public ISimpleTransform + { + public: + AddUserDeduplicationTokenTransform(String token_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , token(token_) + { + } + + String getName() const override { return "AddUserDeduplicationTokenTransform"; } + + void transform(Chunk & chunk) override + { + chunk.getChunkInfos().add(std::make_shared(token)); + } + + private: + String token; + }; + + + class CheckInsertDeduplicationTokenTransform : public ISimpleTransform + { + public: + CheckInsertDeduplicationTokenTransform(String debug_, bool must_be_present_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , debug(debug_) + , must_be_present(must_be_present_) + { + } + + String getName() const override { return "CheckInsertDeduplicationTokenTransform"; } + + void transform(Chunk & chunk) override + { + if (!must_be_present) + return; + + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); + + LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), + "{}, token: {}", + debug, token_info->getToken()); + } + + private: + String debug; + bool must_be_present = false; + }; + + + class ExtendDeduplicationWithBlockNumberFromInfoTokenTransform : public ISimpleTransform + { + public: + explicit ExtendDeduplicationWithBlockNumberFromInfoTokenTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "ExtendDeduplicationWithBlockNumberFromInfoTokenTransform"; } + + void transform(Chunk & chunk) override + { + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, recs {}", chunk.getChunkInfos().size()); + + auto block_number_info = chunk.getChunkInfos().get(); + if (!block_number_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have SerialBlockNumberInfo as ChunkInfo"); + + token_info->addTokenPart(fmt::format(":block-{}", block_number_info->block_number)); + + LOG_DEBUG(getLogger("ExtendDeduplicationWithBlockNumberFromInfoTokenTransform"), + "updated with {}, result: {}", + fmt::format(":block-{}", block_number_info->block_number), token_info->getToken()); + } + }; + + class ExtendDeduplicationWithBlockNumberTokenTransform : public ISimpleTransform + { + public: + explicit ExtendDeduplicationWithBlockNumberTokenTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "ExtendDeduplicationWithBlockNumberTokenTransform"; } + + void transform(Chunk & chunk) override + { + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo"); + + auto x = block_number++; + token_info->addTokenPart(fmt::format(":block-{}", x)); + + LOG_DEBUG(getLogger("ExtendDeduplicationWithBlockNumberTokenTransform"), + "updated with {}, result: {}", + fmt::format(":block-{}", x), token_info->getToken()); + } + private: + size_t block_number = 0; + }; + + class ExtendDeduplicationWithTokenPartTransform : public ISimpleTransform + { + public: + ExtendDeduplicationWithTokenPartTransform(String token_part_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , token_part(token_part_) + { + } + + String getName() const override { return "ExtendDeduplicationWithBlockNumberTokenTransform"; } + + void transform(Chunk & chunk) override + { + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, try to add token part {}", token_part); + + token_info->addTokenPart(fmt::format("{}", token_part)); + + LOG_DEBUG(getLogger("ExtendDeduplicationWithTokenPartTransform"), + "updated with {}, result: {}", + token_part, token_info->getToken()); + } + + private: + String token_part; + }; + +} diff --git a/src/Processors/Transforms/SelectByIndicesTransform.h b/src/Processors/Transforms/SelectByIndicesTransform.h index 480ab1a0f61..b44f5a3203e 100644 --- a/src/Processors/Transforms/SelectByIndicesTransform.h +++ b/src/Processors/Transforms/SelectByIndicesTransform.h @@ -26,7 +26,7 @@ public: void transform(Chunk & chunk) override { size_t num_rows = chunk.getNumRows(); - const auto * select_final_indices_info = typeid_cast(chunk.getChunkInfo().get()); + auto select_final_indices_info = chunk.getChunkInfos().extract(); if (!select_final_indices_info || !select_final_indices_info->select_final_indices) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk passed to SelectByIndicesTransform without indices column"); @@ -41,7 +41,6 @@ public: chunk.setColumns(std::move(columns), index_column->size()); } - chunk.setChunkInfo(nullptr); } }; diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 0d69b6e0a8d..4d693e5e809 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -17,9 +17,14 @@ SquashingChunksTransform::SquashingChunksTransform( void SquashingChunksTransform::onConsume(Chunk chunk) { + if (cur_chunkinfos.empty()) + cur_chunkinfos = chunk.getChunkInfos(); + if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) { cur_chunk.setColumns(block.getColumns(), block.rows()); + cur_chunk.setChunkInfos(std::move(cur_chunkinfos)); + cur_chunkinfos = {}; } } @@ -35,6 +40,8 @@ void SquashingChunksTransform::onFinish() { auto block = squashing.add({}); finish_chunk.setColumns(block.getColumns(), block.rows()); + finish_chunk.setChunkInfos(std::move(cur_chunkinfos)); + cur_chunkinfos = {}; } void SquashingChunksTransform::work() @@ -65,7 +72,10 @@ void SimpleSquashingChunksTransform::transform(Chunk & chunk) if (!finished) { if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) + { chunk.setColumns(block.getColumns(), block.rows()); + chunk.setChunkInfos(chunk.getChunkInfos()); + } } else { diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index f82e9e46a61..6de96d4100d 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -3,6 +3,7 @@ #include #include #include +#include "Processors/Chunk.h" namespace DB { @@ -25,6 +26,7 @@ protected: private: SquashingTransform squashing; Chunk cur_chunk; + Chunk::ChunkInfoCollection cur_chunkinfos; Chunk finish_chunk; }; diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index aa86879e62c..59fceccb538 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -150,11 +150,7 @@ void TotalsHavingTransform::transform(Chunk & chunk) /// Block with values not included in `max_rows_to_group_by`. We'll postpone it. if (overflow_row) { - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in TotalsHavingTransform."); - - const auto * agg_info = typeid_cast(info.get()); + const auto & agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in TotalsHavingTransform."); diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 70f30faa5b1..056f8d07627 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -23,9 +24,12 @@ #include #include #include +#include "Processors/Chunk.h" +#include "Processors/Transforms/NumberBlocksTransform.h" #include #include +#include namespace ProfileEvents @@ -120,6 +124,7 @@ private: { QueryPipeline pipeline; PullingPipelineExecutor executor; + Chunk::ChunkInfoCollection chunk_infos; explicit State(QueryPipeline pipeline_) : pipeline(std::move(pipeline_)) @@ -137,7 +142,7 @@ class PushingToLiveViewSink final : public SinkToStorage public: PushingToLiveViewSink(const Block & header, StorageLiveView & live_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToLiveViewSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; private: StorageLiveView & live_view; @@ -151,7 +156,7 @@ class PushingToWindowViewSink final : public SinkToStorage public: PushingToWindowViewSink(const Block & header, StorageWindowView & window_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToWindowViewSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; private: StorageWindowView & window_view; @@ -215,50 +220,6 @@ std::optional generateViewChain( const auto & insert_settings = insert_context->getSettingsRef(); - // Do not deduplicate insertions into MV if the main insertion is Ok - if (disable_deduplication_for_children) - { - insert_context->setSetting("insert_deduplicate", Field{false}); - } - else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views && - !insert_settings.insert_deduplication_token.value.empty()) - { - - /// TODO! - /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle - * deduplication in complex INSERT flows. - * - * Example: - * - * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1 - * | | - * └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘ - * - * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will - * be inserted into `ds_2_1`. - * - * We are forced to use view id instead of table id because there are some possible INSERT flows where no tables - * are involved. - * - * Example: - * - * landing -┬--> mv_1_1 --┬-> ds_1_1 - * | | - * └--> mv_1_2 --┘ - * - */ - auto insert_deduplication_token = insert_settings.insert_deduplication_token.value; - - if (view_id.hasUUID()) - insert_deduplication_token += "_" + toString(view_id.uuid); - else - insert_deduplication_token += "_" + view_id.getFullNameNotQuoted(); - - LOG_DEBUG(getLogger("PushingToViews"), "insert_deduplication_token {}", insert_deduplication_token); - - insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); - } - // Processing of blocks for MVs is done block by block, and there will // be no parallel reading after (plus it is not a costless operation) select_context->setSetting("parallelize_output_from_storages", Field{false}); @@ -364,12 +325,22 @@ std::optional generateViewChain( insert_columns.emplace_back(column.name); } - InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false); + InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false, false); /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false` bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, check_access); + out.addSource(std::make_shared("Before inner chain", !disable_deduplication_for_children, out.getInputHeader())); + + if (!disable_deduplication_for_children) + { + String addition_part = view_id.hasUUID() ? toString(view_id.uuid) : view_id.getFullNameNotQuoted(); + out.addSource(std::make_shared(fmt::format(":mv-{}", addition_part), out.getInputHeader())); + } + + out.addSource(std::make_shared("Before extend token", !disable_deduplication_for_children, out.getInputHeader())); + if (interpreter.shouldAddSquashingFroStorage(inner_table)) { bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); @@ -381,6 +352,8 @@ std::optional generateViewChain( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } + out.addSource(std::make_shared("Before squashing", !disable_deduplication_for_children, out.getInputHeader())); + auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); counting->setProgressCallback(insert_context->getProgressCallback()); @@ -422,11 +395,20 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { + out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); + + if (!disable_deduplication_for_children) + { + out.addSource(std::make_shared(out.getInputHeader())); + } + auto executing_inner_query = std::make_shared( storage_header, views_data->views.back(), views_data); executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms); out.addSource(std::move(executing_inner_query)); + + out.addSource(std::make_shared("Right before Inner query", !disable_deduplication_for_children, out.getInputHeader())); } return out; @@ -641,6 +623,9 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), std::make_shared(std::move(converting)))); + pipeline.addTransform(std::make_shared(pipeline.getHeader())); + //pipeline.addTransform(std::make_shared(pipeline.getHeader())); + return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } @@ -743,6 +728,7 @@ void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); state.emplace(process(block, view, *views_data)); + state->chunk_infos = chunk.getChunkInfos(); } @@ -760,6 +746,9 @@ ExecutingInnerQueryFromViewTransform::GenerateResult ExecutingInnerQueryFromView break; } + // here are we copy chunk_infos to the all chunks generated from the one consumed chunk + res.chunk.getChunkInfos().append(state->chunk_infos.clone()); + if (res.is_done) state.reset(); @@ -774,10 +763,10 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi { } -void PushingToLiveViewSink::consume(Chunk chunk) +void PushingToLiveViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(getHeader().cloneWithColumns(chunk.detachColumns()), context); + live_view.writeBlock(getHeader().cloneWithColumns(chunk.getColumns()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -797,11 +786,11 @@ PushingToWindowViewSink::PushingToWindowViewSink( { } -void PushingToWindowViewSink::consume(Chunk chunk) +void PushingToWindowViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader().cloneWithColumns(chunk.detachColumns()), context); + window_view, getHeader().cloneWithColumns(chunk.getColumns()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index f0b2ead687e..a9e5b1535c0 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -193,7 +193,7 @@ public: return concurrency_control; } - void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); } + void addResources(QueryPlanResourceHolder resources_) { resources.append(std::move(resources_)); } void setQueryIdHolder(std::shared_ptr query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); } void addContext(ContextPtr context) { resources.interpreter_context.emplace_back(std::move(context)); } diff --git a/src/QueryPipeline/QueryPlanResourceHolder.cpp b/src/QueryPipeline/QueryPlanResourceHolder.cpp index 2cd4dc42a83..bb2be2c8ffb 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.cpp +++ b/src/QueryPipeline/QueryPlanResourceHolder.cpp @@ -5,7 +5,7 @@ namespace DB { -QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept +QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolder && rhs) noexcept { table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end()); storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end()); @@ -16,6 +16,12 @@ QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHo return *this; } +QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept +{ + append(std::move(rhs)); + return *this; +} + QueryPlanResourceHolder::QueryPlanResourceHolder() = default; QueryPlanResourceHolder::QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept = default; QueryPlanResourceHolder::~QueryPlanResourceHolder() = default; diff --git a/src/QueryPipeline/QueryPlanResourceHolder.h b/src/QueryPipeline/QueryPlanResourceHolder.h index ed9eb68b7ba..e40fa04f72c 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.h +++ b/src/QueryPipeline/QueryPlanResourceHolder.h @@ -19,9 +19,12 @@ struct QueryPlanResourceHolder QueryPlanResourceHolder(); QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept; ~QueryPlanResourceHolder(); + + QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &) = delete; /// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs. QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &&) noexcept; + QueryPlanResourceHolder & append(QueryPlanResourceHolder &&) noexcept; /// Some processors may implicitly use Context or temporary Storage created by Interpreter. /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index e556bda2561..2e3096683d0 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -134,7 +134,7 @@ DistributedSink::DistributedSink( } -void DistributedSink::consume(Chunk chunk) +void DistributedSink::consume(Chunk & chunk) { if (is_first_chunk) { @@ -142,7 +142,7 @@ void DistributedSink::consume(Chunk chunk) is_first_chunk = false; } - auto ordinary_block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto ordinary_block = getHeader().cloneWithColumns(chunk.getColumns()); if (insert_sync) writeSync(ordinary_block); @@ -420,7 +420,7 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si /// to resolve tables (in InterpreterInsertQuery::getTable()) auto copy_query_ast = query_ast->clone(); - InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized); + InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized, false, false, false); auto block_io = interp.execute(); job.pipeline = std::move(block_io.pipeline); @@ -715,7 +715,7 @@ void DistributedSink::writeToLocal(const Cluster::ShardInfo & shard_info, const try { - InterpreterInsertQuery interp(query_ast, context, allow_materialized); + InterpreterInsertQuery interp(query_ast, context, allow_materialized, false, false, false); auto block_io = interp.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index a4c95633595..5b7396f2c6f 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -49,7 +49,7 @@ public: const Names & columns_to_send_); String getName() const override { return "DistributedSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; private: diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index abd4b4ce23b..6ca4ec6e079 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -740,7 +740,7 @@ bool StorageFileLog::streamToViews() auto new_context = Context::createCopy(getContext()); - InterpreterInsertQuery interpreter(insert, new_context, false, true, true); + InterpreterInsertQuery interpreter(insert, new_context, false, true, true, false); auto block_io = interpreter.execute(); /// Each stream responsible for closing it's files and store meta diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 0f3b03f0955..1ca7c1f71d0 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -803,12 +803,12 @@ public: String getName() const override { return "HDFSSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onCancel() override diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 03a30d47d91..7b19dacb4c9 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1098,7 +1098,7 @@ bool StorageKafka::streamToViews() // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true); + InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true, false); auto block_io = interpreter.execute(); // Create a stream for each consumer and join them in a union stream diff --git a/src/Storages/LiveView/LiveViewSink.h b/src/Storages/LiveView/LiveViewSink.h index 792133ced64..9803fa0a160 100644 --- a/src/Storages/LiveView/LiveViewSink.h +++ b/src/Storages/LiveView/LiveViewSink.h @@ -71,9 +71,9 @@ public: new_hash.reset(); } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); block.updateHash(*new_hash); new_blocks->push_back(std::move(block)); } diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index bc8cb0ce69a..57d75b969c3 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -377,7 +377,7 @@ void RefreshTask::executeRefreshUnlocked(std::shared_ptr(task->getInfo().data_part->info.level)); + return ChunkAndProgress{ - .chunk = Chunk(ordered_columns, res.row_count, add_part_level ? std::make_shared(task->getInfo().data_part->info.level) : nullptr), + .chunk = std::move(chunk), .num_read_rows = res.num_read_rows, .num_read_bytes = res.num_read_bytes, .is_finished = false}; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index fbb48b37482..8841f490e38 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -275,7 +275,10 @@ try ++it; } - return Chunk(std::move(res_columns), rows_read, add_part_level ? std::make_shared(data_part->info.level) : nullptr); + auto result = Chunk(std::move(res_columns), rows_read); + if (add_part_level) + result.getChunkInfos().add(std::make_shared(data_part->info.level)); + return result; } } else diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index f0eb56aea13..2e455cd2bd5 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,8 +1,11 @@ #include #include +#include #include #include +#include "Common/Exception.h" #include +#include "Interpreters/StorageID.h" namespace ProfileEvents { @@ -56,7 +59,7 @@ void MergeTreeSink::onFinish() finishDelayedChunk(); } -void MergeTreeSink::consume(Chunk chunk) +void MergeTreeSink::consume(Chunk & chunk) { LOG_INFO(storage.log, "consume() called num_blocks_processed {}, chunks: rows {} columns {} bytes {}", num_blocks_processed, @@ -65,7 +68,7 @@ void MergeTreeSink::consume(Chunk chunk) if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(nullptr, context, false); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); @@ -80,6 +83,30 @@ void MergeTreeSink::consume(Chunk chunk) size_t streams = 0; bool support_parallel_write = false; + String block_dedup_token; + if (storage.getDeduplicationLog()) + { + auto token_info = chunk.getChunkInfos().get(); + if (!token_info && !context->getSettingsRef().insert_deduplication_token.value.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "DedupTokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); + + if (token_info) + { + block_dedup_token = token_info->getToken(); + + LOG_DEBUG(storage.log, + "dedup token from insert deduplication token in chunk: {}", + block_dedup_token); + } + else + { + LOG_DEBUG(storage.log, + "dedup token from hash is caclulated"); + } + } + for (auto & current_block : part_blocks) { ProfileEvents::Counters part_counters; @@ -99,6 +126,11 @@ void MergeTreeSink::consume(Chunk chunk) current_block.block.clear(); current_block.partition.clear(); + if (auto children_dedup_token = getDeduplicationTokenForChildren(chunk)) + { + children_dedup_token->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + } + /// If optimize_on_insert setting is true, current_block could become empty after merge /// and we didn't create part. if (!temp_part.part) @@ -107,19 +139,6 @@ void MergeTreeSink::consume(Chunk chunk) if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) support_parallel_write = true; - String block_dedup_token; - if (storage.getDeduplicationLog()) - { - const String & dedup_token = settings.insert_deduplication_token; - if (!dedup_token.empty()) - { - /// multiple blocks can be inserted within the same insert query - /// an ordinal number is added to dedup token to generate a distinctive block id for each block - block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); - ++chunk_dedup_seqnum; - } - } - size_t max_insert_delayed_streams_for_parallel_write; if (settings.max_insert_delayed_streams_for_parallel_write.changed) @@ -151,6 +170,7 @@ void MergeTreeSink::consume(Chunk chunk) partitions = DelayedPartitions{}; } + /// TODO block_dedup_token partitions.emplace_back(MergeTreeSink::DelayedChunk::Partition { .temp_part = std::move(temp_part), diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index 07ab3850df2..4e1ca5c1f60 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -25,7 +25,7 @@ public: ~MergeTreeSink() override; String getName() const override { return "MergeTreeSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onStart() override; void onFinish() override; @@ -35,13 +35,13 @@ private: size_t max_parts_per_block; ContextPtr context; StorageSnapshotPtr storage_snapshot; - UInt64 chunk_dedup_seqnum = 0; /// input chunk ordinal number in case of dedup token UInt64 num_blocks_processed = 0; /// We can delay processing for previous chunk and start writing a new one. struct DelayedChunk; std::unique_ptr delayed_chunk; + void fillDeduplicationTokenForChildren(Chunk &) const override { /* For MergeTree we get the tokens from part checksums */ } void finishDelayedChunk(); }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 2bb9aad1e53..ce140c93cbe 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -253,12 +254,12 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const } template -void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) +void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, false); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); const auto & settings = context->getSettingsRef(); @@ -284,13 +285,40 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) if constexpr (async_insert) { - const auto & chunk_info = chunk.getChunkInfo(); - if (const auto * async_insert_info_ptr = typeid_cast(chunk_info.get())) + const auto async_insert_info_ptr = chunk.getChunkInfos().get(); + if (async_insert_info_ptr) async_insert_info = std::make_shared(async_insert_info_ptr->offsets, async_insert_info_ptr->tokens); else throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } + String block_dedup_token; + if constexpr (!async_insert) + { + auto token_info = chunk.getChunkInfos().get(); + if (!token_info && !context->getSettingsRef().insert_deduplication_token.value.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "DedupTokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); + + + if (token_info) + { + /// multiple blocks can be inserted within the same insert query + /// an ordinal number is added to dedup token to generate a distinctive block id for each block + block_dedup_token = token_info->getToken(); + + LOG_DEBUG(storage.log, + "dedup token from insert deduplication token in chunk: {}", + block_dedup_token); + } + else + { + LOG_DEBUG(storage.log, + "dedup token from hash is caclulated"); + } + } + auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); using DelayedPartition = typename ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition; @@ -342,23 +370,10 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) } else { - if (deduplicate) { - String block_dedup_token; - /// We add the hash from the data and partition identifier to deduplication ID. /// That is, do not insert the same data to the same partition twice. - - const String & dedup_token = settings.insert_deduplication_token; - if (!dedup_token.empty()) - { - /// multiple blocks can be inserted within the same insert query - /// an ordinal number is added to dedup token to generate a distinctive block id for each block - block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); - ++chunk_dedup_seqnum; - } - block_id = temp_part.part->getZeroLevelPartBlockID(block_dedup_token); LOG_DEBUG(log, "Wrote block with ID '{}', {} rows{}", block_id, current_block.block.rows(), quorumLogMessage(replicas_num)); } @@ -366,6 +381,11 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) { LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } + + if (auto children_dedup_token = getDeduplicationTokenForChildren(chunk)) + { + children_dedup_token->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + } } profile_events_scope.reset(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 39623c20584..b1eff67d845 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -51,7 +51,7 @@ public: ~ReplicatedMergeTreeSinkImpl() override; void onStart() override; - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; String getName() const override { return "ReplicatedMergeTreeSink"; } @@ -139,6 +139,7 @@ private: /// We can delay processing for previous chunk and start writing a new one. std::unique_ptr delayed_chunk; + void fillDeduplicationTokenForChildren(Chunk &) const override { /* For MergeTree we get the tokens from part checksums */ } void finishDelayedChunk(const ZooKeeperWithFaultInjectionPtr & zookeeper); }; diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index 4fb81d69070..36899011e33 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -40,7 +40,7 @@ void MessageQueueSink::onFinish() producer->finish(); } -void MessageQueueSink::consume(Chunk chunk) +void MessageQueueSink::consume(Chunk & chunk) { const auto & columns = chunk.getColumns(); if (columns.empty()) diff --git a/src/Storages/MessageQueueSink.h b/src/Storages/MessageQueueSink.h index b3c1e61734f..4a9248c6c4d 100644 --- a/src/Storages/MessageQueueSink.h +++ b/src/Storages/MessageQueueSink.h @@ -35,7 +35,7 @@ public: String getName() const override { return storage_name + "Sink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onStart() override; void onFinish() override; diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 0b88a9e8929..9c6d70f2c5b 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -644,7 +644,7 @@ bool StorageNATS::streamToViews() insert->table_id = table_id; // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, nats_context, false, true, true); + InterpreterInsertQuery interpreter(insert, nats_context, false, true, true, false); auto block_io = interpreter.execute(); auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp index 09b009b26d8..ee2570756ed 100644 --- a/src/Storages/PartitionedSink.cpp +++ b/src/Storages/PartitionedSink.cpp @@ -51,7 +51,7 @@ SinkPtr PartitionedSink::getSinkForPartitionKey(StringRef partition_key) return it->second; } -void PartitionedSink::consume(Chunk chunk) +void PartitionedSink::consume(Chunk & chunk) { const auto & columns = chunk.getColumns(); @@ -104,7 +104,7 @@ void PartitionedSink::consume(Chunk chunk) for (const auto & [partition_key, partition_index] : partition_id_to_chunk_index) { auto sink = getSinkForPartitionKey(partition_key); - sink->consume(std::move(partition_index_to_chunk[partition_index])); + sink->consume(partition_index_to_chunk[partition_index]); } } diff --git a/src/Storages/PartitionedSink.h b/src/Storages/PartitionedSink.h index 68edeb6fd73..fcd67556dc9 100644 --- a/src/Storages/PartitionedSink.h +++ b/src/Storages/PartitionedSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "PartitionedSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onException(std::exception_ptr exception) override; diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index ba3cc6f58d0..57c8d24ccc2 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -697,7 +697,7 @@ void MaterializedPostgreSQLConsumer::syncTables() insert->table_id = storage->getStorageID(); insert->columns = std::make_shared(buffer->columns_ast); - InterpreterInsertQuery interpreter(insert, insert_context, true); + InterpreterInsertQuery interpreter(insert, insert_context, true, false, false, false); auto io = interpreter.execute(); auto input = std::make_shared( result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows())); diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 2bb1e2dde0d..4a5a621aa43 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -437,7 +437,7 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection auto insert_context = materialized_storage->getNestedTableContext(); - InterpreterInsertQuery interpreter(insert, insert_context); + InterpreterInsertQuery interpreter(insert, insert_context, false, false, false, false); auto block_io = interpreter.execute(); const StorageInMemoryMetadata & storage_metadata = nested_storage->getInMemoryMetadata(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index e4b19992151..5bf5ab9b2f5 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1129,7 +1129,7 @@ bool StorageRabbitMQ::tryStreamToViews() } // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true); + InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true, false); auto block_io = interpreter.execute(); block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp index c451cfd1bf5..1f7f6939f40 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp @@ -29,7 +29,7 @@ EmbeddedRocksDBSink::EmbeddedRocksDBSink( serializations = getHeader().getSerializations(); } -void EmbeddedRocksDBSink::consume(Chunk chunk) +void EmbeddedRocksDBSink::consume(Chunk & chunk) { auto rows = chunk.getNumRows(); const auto & columns = chunk.getColumns(); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.h b/src/Storages/RocksDB/EmbeddedRocksDBSink.h index 011322df829..2e1e0c7b429 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.h @@ -17,7 +17,7 @@ public: StorageEmbeddedRocksDB & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; String getName() const override { return "EmbeddedRocksDBSink"; } private: diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 01417b8977b..47e1b8feb43 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -310,7 +310,8 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt Block block; while (executor.pull(block)) { - sink->consume(Chunk{block.getColumns(), block.rows()}); + auto chunk = Chunk(block.getColumns(), block.rows()); + sink->consume(chunk); } } diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index c3a772e532c..12abd7a9849 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -498,7 +498,7 @@ bool StorageS3Queue::streamToViews() // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true); + InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true, false); auto block_io = interpreter.execute(); auto file_iterator = createFileIterator(s3queue_context, nullptr); diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 9c551e82a99..5dc407bf86d 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -600,12 +600,12 @@ public: String getName() const override { return "StorageAzureBlobSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onCancel() override diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index d9a0b2b4d59..d4defd92196 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -605,7 +605,7 @@ public: String getName() const override { return "BufferSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { size_t rows = chunk.getNumRows(); if (!rows) @@ -1018,7 +1018,7 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl auto insert_context = Context::createCopy(getContext()); insert_context->makeQueryContext(); - InterpreterInsertQuery interpreter{insert, insert_context, allow_materialized}; + InterpreterInsertQuery interpreter(insert, insert_context, allow_materialized, false, false, false); auto block_io = interpreter.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 7b5916c0273..0478936fdfc 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1047,7 +1047,7 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu const auto & shard_info = shards_info[shard_index]; if (shard_info.isLocal()) { - InterpreterInsertQuery interpreter(new_query, query_context); + InterpreterInsertQuery interpreter(new_query, query_context, false, false, false, false); pipeline.addCompletedPipeline(interpreter.execute().pipeline); } else diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 76d75a368b3..581e0f87f15 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1770,12 +1770,12 @@ public: String getName() const override { return "StorageFileSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { std::lock_guard cancel_lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onCancel() override diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 20f99070000..c80e799a92b 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -119,10 +119,10 @@ public: std::string getName() const override { return "StorageKeeperMapSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -1248,7 +1248,10 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca Block block; while (executor.pull(block)) - sink->consume(Chunk{block.getColumns(), block.rows()}); + { + auto chunk = Chunk(block.getColumns(), block.rows()); + sink->consume(chunk); + } sink->finalize(strict); } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 25c48de94e1..fad31e8ae03 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -337,7 +337,7 @@ public: } } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; private: @@ -388,9 +388,9 @@ private: }; -void LogSink::consume(Chunk chunk) +void LogSink::consume(Chunk & chunk) { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); metadata_snapshot->check(block, true); for (auto & stream : streams | boost::adaptors::map_values) diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index f69c4adb552..b1bd7053c2e 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -63,7 +63,7 @@ public: String getName() const override { return "MemorySink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); storage_snapshot->metadata->check(block, true); diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 62a2a048642..e0818fafae9 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include @@ -107,12 +106,12 @@ public: String getName() const override { return "StorageMongoDBSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { Poco::MongoDB::Database db(db_name); Poco::MongoDB::Document::Vector documents; - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); size_t num_rows = block.rows(); size_t num_cols = block.columns(); diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index da391909dff..2a8a7bd2ee7 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -151,9 +151,9 @@ public: String getName() const override { return "StorageMySQLSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); auto blocks = splitBlocks(block, max_batch_rows); mysqlxx::Transaction trans(entry); try diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 9379cb5a1c6..c99de3e3588 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -163,9 +163,9 @@ public: String getName() const override { return "PostgreSQLSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); if (!inserter) { if (on_conflict.empty()) diff --git a/src/Storages/StorageRedis.cpp b/src/Storages/StorageRedis.cpp index 83bb3c606c9..1a275320f43 100644 --- a/src/Storages/StorageRedis.cpp +++ b/src/Storages/StorageRedis.cpp @@ -147,7 +147,7 @@ class RedisSink : public SinkToStorage public: RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; String getName() const override { return "RedisSink"; } private: @@ -169,10 +169,10 @@ RedisSink::RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadat } } -void RedisSink::consume(Chunk chunk) +void RedisSink::consume(Chunk & chunk) { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -567,7 +567,8 @@ void StorageRedis::mutate(const MutationCommands & commands, ContextPtr context_ Block block; while (executor.pull(block)) { - sink->consume(Chunk{block.getColumns(), block.rows()}); + Chunk chunk(block.getColumns(), block.rows()); + sink->consume(chunk); } } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 9768653f3fe..7975b42ac02 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1100,12 +1100,12 @@ public: String getName() const override { return "StorageS3Sink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onCancel() override diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 179e4cee199..85417a2f2a4 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -141,7 +141,7 @@ public: String getName() const override { return "SQLiteSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString sqlbuf; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 54218351cf1..4105e8decd3 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -44,7 +44,7 @@ public: const String & backup_file_name_, bool persistent_); String getName() const override { return "SetOrJoinSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; private: @@ -82,9 +82,9 @@ SetOrJoinSink::SetOrJoinSink( { } -void SetOrJoinSink::consume(Chunk chunk) +void SetOrJoinSink::consume(Chunk & chunk) { - Block block = getHeader().cloneWithColumns(chunk.detachColumns()); + Block block = getHeader().cloneWithColumns(chunk.getColumns()); table.insertBlock(block, getContext()); if (persistent) diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 48389dccf48..7fa5a5670a3 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -217,9 +217,9 @@ public: } } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - block_out->write(getHeader().cloneWithColumns(chunk.detachColumns())); + block_out->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onFinish() override diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 8a71a771367..c21d24ac2e5 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -559,12 +559,12 @@ StorageURLSink::StorageURLSink( } -void StorageURLSink::consume(Chunk chunk) +void StorageURLSink::consume(Chunk & chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void StorageURLSink::onCancel() diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 5aca3df1513..e90585c79ca 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -251,7 +251,7 @@ public: const String & method = Poco::Net::HTTPRequest::HTTP_POST); std::string getName() const override { return "StorageURLSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onCancel() override; void onException(std::exception_ptr exception) override; void onFinish() override; diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 7afa1894a64..d295bebe615 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -119,7 +119,7 @@ public: ZooKeeperSink(const Block & header, ContextPtr context) : SinkToStorage(header), zookeeper(context->getZooKeeper()) { } String getName() const override { return "ZooKeeperSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); size_t rows = block.rows(); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index a9ec1f6c694..e0f3b437af7 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -305,7 +305,7 @@ namespace public: explicit AddingAggregatedChunkInfoTransform(Block header) : ISimpleTransform(header, header, false) { } - void transform(Chunk & chunk) override { chunk.setChunkInfo(std::make_shared()); } + void transform(Chunk & chunk) override { chunk.getChunkInfos().add(std::make_shared()); } String getName() const override { return "AddingAggregatedChunkInfoTransform"; } }; @@ -690,7 +690,7 @@ inline void StorageWindowView::fire(UInt32 watermark) StoragePtr target_table = getTargetTable(); auto insert = std::make_shared(); insert->table_id = target_table->getStorageID(); - InterpreterInsertQuery interpreter(insert, getContext()); + InterpreterInsertQuery interpreter(insert, getContext(), false, false, false, false); auto block_io = interpreter.execute(); auto pipe = Pipe(std::make_shared(blocks, header)); diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python index 3cd29247910..87c48a73513 100644 --- a/tests/queries/0_stateless/03008_deduplication.python +++ b/tests/queries/0_stateless/03008_deduplication.python @@ -49,19 +49,40 @@ def instance_create_statement(table_name, table_columns, table_keys, table_engin return __format(template, **params) -def instance_insert_statement(table_name, count, insert_unique_blocks, use_insert_token): - template = """ - INSERT INTO {table_name} - SELECT {insert_columns} - FROM numbers({count}) {insert_settings}; - """ - return __format( - template, - table_name=table_name, - count=count, - insert_columns="'src_4', 4" if not insert_unique_blocks else "'src_' || toString(number), number", - insert_settings="" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'", - ) +def instance_insert_statement(table_name, count, insert_method, insert_unique_blocks, use_insert_token): + insert_settings = "" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'" + + if insert_method == 'InsertSelect': + template = """ + INSERT INTO {table_name} + SELECT {insert_columns} + FROM numbers({count}) {insert_settings}; + """ + return __format( + template, + table_name=table_name, + count=count, + insert_columns="'src_4', 4" if not insert_unique_blocks else "'src_' || toString(number), number", + insert_settings=insert_settings, + ) + + else: + template = """ + INSERT INTO {table_name} + {insert_settings} VALUES {insert_values}; + """ + + values = [] + for i in range(count): + values += [f"('src_{i}', {i})"] if insert_unique_blocks else ["('src_4', 4)"] + insert_values = ", ".join(values) + + return __format( + template, + table_name=table_name, + insert_settings=insert_settings, + insert_values=insert_values, + ) def get_drop_tables_statements(tables): @@ -109,6 +130,10 @@ class ArgsFactory: def add_opt_uniq_blocks(self): self.__parser.add_argument("--insert-unique-blocks", type=str2bool, nargs='?', const=True, default=True) + def add_opt_insert_method(self): + self.__parser.add_argument( + "--insert-method", choices=["InsertSelect", "InsertValues"], default="InsertSelect") + def add_all(self): self.add_opt_engine() self.add_opt_user_token() @@ -116,6 +141,7 @@ class ArgsFactory: self.add_opt_dedup_src() self.add_opt_dedup_dst() self.add_opt_get_logs() + self.add_opt_insert_method() self.add_opt_uniq_blocks() @@ -151,14 +177,14 @@ def test_insert_several_blocks(parser): drop_tables_statements = get_drop_tables_statements( ["table_a_b", "table_when_b_even", "mv_b_even"] ) insert_statement = instance_insert_statement( - "table_a_b", 10, args.insert_unique_blocks, args.use_insert_token + "table_a_b", 10, args.insert_method, args.insert_unique_blocks, args.use_insert_token ) print_details_statements = f""" SELECT 'table_a_b'; SELECT 'count', count() FROM table_a_b; {"" if not args.get_logs else "SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part;"} - + SELECT 'table_when_b_even'; SELECT 'count', count() FROM table_when_b_even; {"" if not args.get_logs else "SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part;"} @@ -209,37 +235,37 @@ def test_insert_several_blocks(parser): script = f""" {get_logs_statement(args)} - + SET max_insert_threads={1 if args.single_thread else 10}; SET update_insert_deduplication_token_in_dependent_materialized_views=1; SET deduplicate_blocks_in_dependent_materialized_views=1; - + SET max_block_size=1; SET min_insert_block_size_rows=0; SET min_insert_block_size_bytes=0; - + {drop_tables_statements} - + {create_table_a_b_statement} - + {create_table_when_b_even_statement} - + {create_mv_statement} - + -- first insert {insert_statement} - + {print_details_statements} - + {assert_first_insert_statements} - + -- second insert, it is retry {insert_statement} - + {print_details_statements} - + {assert_second_insert_statements} - + {drop_tables_statements} """ @@ -279,13 +305,13 @@ def test_mv_generates_several_blocks(parser): ) insert_statement = instance_insert_statement( - "table_a_b", 5, args.insert_unique_blocks, args.use_insert_token + "table_a_b", 5, args.insert_method, args.insert_unique_blocks, args.use_insert_token ) details_print_statements = f""" SELECT 'table_a_b'; SELECT 'count', count() FROM table_a_b; - + SELECT 'table_when_b_even_and_joined'; SELECT 'count', count() FROM table_when_b_even_and_joined; {"" if not args.get_logs else "SELECT _part, a_src, a_join, b FROM table_when_b_even_and_joined ORDER BY _part;"} @@ -295,7 +321,7 @@ def test_mv_generates_several_blocks(parser): assert_first_insert_statements = f""" SELECT throwIf( count() != 5 ) FROM table_a_b; - + SELECT throwIf( count() != 47 ) FROM table_when_b_even_and_joined; """ @@ -311,7 +337,7 @@ def test_mv_generates_several_blocks(parser): assert_first_insert_statements = f""" SELECT throwIf( count() != {5 if args.deduplicate_src_table else 5} ) FROM table_a_b; - + SELECT throwIf( count() != {45 if args.deduplicate_dst_table else 45} ) FROM table_when_b_even_and_joined; """ @@ -326,14 +352,14 @@ def test_mv_generates_several_blocks(parser): assert_first_insert_statements = f""" SELECT throwIf( count() != {1 if args.deduplicate_src_table else 5} ) FROM table_a_b; - + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 45} ) FROM table_when_b_even_and_joined; """ assert_second_insert_statements = f""" SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) FROM table_a_b; - + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 90} ) FROM table_when_b_even_and_joined; """ @@ -344,13 +370,13 @@ def test_mv_generates_several_blocks(parser): SET max_insert_threads={1 if args.single_thread else 10}; SET update_insert_deduplication_token_in_dependent_materialized_views=1; SET deduplicate_blocks_in_dependent_materialized_views=1; - + SET max_block_size=1; SET min_insert_block_size_rows=0; SET min_insert_block_size_bytes=0; - + {drop_tables_statements} - + CREATE TABLE table_for_join_with (a_join String, b UInt64) ENGINE = MergeTree() @@ -359,13 +385,13 @@ def test_mv_generates_several_blocks(parser): SELECT 'joined_' || toString(number), number FROM numbers(9); {details_print_for_table_for_join_with} - + {create_table_a_b_statement} SYSTEM STOP MERGES table_a_b; - + {create_table_when_b_even_and_joined_statement} SYSTEM STOP MERGES table_when_b_even_and_joined; - + CREATE MATERIALIZED VIEW mv_b_even TO table_when_b_even_and_joined AS @@ -377,20 +403,20 @@ def test_mv_generates_several_blocks(parser): -- first insert {insert_statement} - + {details_print_statements} - + -- first assertion {assert_first_insert_statements} - + -- second insert {insert_statement} - + {details_print_statements} - + -- second assertion {assert_second_insert_statements} - + {drop_tables_statements} """ @@ -423,12 +449,12 @@ def test_several_mv_into_one_table(parser): ) insert_statement = instance_insert_statement( - "table_src", 8, args.insert_unique_blocks, args.use_insert_token + "table_src", 8, args.insert_method, args.insert_unique_blocks, args.use_insert_token ) details_print_statements = f""" SELECT 'table_src count', count() FROM table_src; - + SELECT 'table_dst count', count() FROM table_dst; {"" if not args.get_logs else "SELECT _part, count() FROM table_dst GROUP BY _part ORDER BY _part;"} """ @@ -453,7 +479,7 @@ def test_several_mv_into_one_table(parser): assert_first_insert_statements = f""" SELECT throwIf( count() != {8 if args.deduplicate_src_table else 8} ) FROM table_src; - + SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 16} ) FROM table_dst; """ @@ -469,7 +495,7 @@ def test_several_mv_into_one_table(parser): SELECT throwIf( count() != {1 if args.deduplicate_src_table else 8} ) FROM table_src; - SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 16} ) + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 16} ) FROM table_dst; """ assert_second_insert_statements = f""" @@ -486,13 +512,13 @@ def test_several_mv_into_one_table(parser): SET max_insert_threads={1 if args.single_thread else 10}; SET update_insert_deduplication_token_in_dependent_materialized_views=1; SET deduplicate_blocks_in_dependent_materialized_views=1; - + SET max_block_size=1; SET min_insert_block_size_rows=0; SET min_insert_block_size_bytes=0; - + {drop_tables_statements} - + {create_table_src_statement} {create_table_dst_statement} @@ -503,7 +529,7 @@ def test_several_mv_into_one_table(parser): SELECT a, b FROM table_src WHERE b % 2 = 0; - + CREATE MATERIALIZED VIEW mv_b_even_even TO table_dst AS @@ -515,16 +541,16 @@ def test_several_mv_into_one_table(parser): {insert_statement} {details_print_statements} - + {assert_first_insert_statements} -- second insert, retry {insert_statement} - + {details_print_statements} {assert_second_insert_statements} - + {drop_tables_statements} """ diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference index 35b2642a4d2..9b4738ce805 100644 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference @@ -1,5 +1,5 @@ -Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -14,7 +14,7 @@ count 5 0 OK -Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -29,7 +29,7 @@ count 10 0 OK -Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -44,7 +44,7 @@ count 10 0 OK -Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -59,7 +59,7 @@ count 20 0 OK -Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -74,7 +74,7 @@ count 5 0 OK -Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -89,7 +89,7 @@ count 10 0 OK -Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -104,7 +104,7 @@ count 10 0 OK -Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -119,35 +119,35 @@ count 20 0 OK -Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 1 table_when_b_even count 1 EXPECTED_TO_FAIL -Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even count 1 EXPECTED_TO_FAIL -Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 1 table_when_b_even count 5 EXPECTED_TO_FAIL -Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even count 10 EXPECTED_TO_FAIL -Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -155,7 +155,7 @@ count 1 0 EXPECTED_TO_FAIL -Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -163,7 +163,7 @@ count 1 0 EXPECTED_TO_FAIL -Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -178,7 +178,7 @@ count 10 0 OK -Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -193,7 +193,7 @@ count 20 0 OK -Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -208,7 +208,7 @@ count 5 0 OK -Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -223,7 +223,7 @@ count 1 0 OK -Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -238,7 +238,7 @@ count 10 0 OK -Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -253,7 +253,7 @@ count 20 0 OK -Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -268,7 +268,7 @@ count 5 0 OK -Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -283,7 +283,7 @@ count 1 0 OK -Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -298,7 +298,7 @@ count 10 0 OK -Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -313,7 +313,7 @@ count 20 0 OK -Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -328,7 +328,7 @@ count 5 0 OK -Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -343,7 +343,7 @@ count 1 0 OK -Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -358,7 +358,7 @@ count 10 0 OK -Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -373,7 +373,7 @@ count 20 0 OK -Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -388,7 +388,7 @@ count 5 0 OK -Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -403,7 +403,7 @@ count 1 0 OK -Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -418,7 +418,7 @@ count 10 0 OK -Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -433,7 +433,7 @@ count 20 0 OK -Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -448,7 +448,7 @@ count 5 0 OK -Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -463,7 +463,7 @@ count 10 0 OK -Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -478,7 +478,7 @@ count 10 0 OK -Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -493,7 +493,7 @@ count 20 0 OK -Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -508,7 +508,7 @@ count 5 0 OK -Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -523,7 +523,7 @@ count 10 0 OK -Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -538,7 +538,7 @@ count 10 0 OK -Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -553,35 +553,35 @@ count 20 0 OK -Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 1 table_when_b_even count 1 EXPECTED_TO_FAIL -Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even count 1 EXPECTED_TO_FAIL -Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 1 table_when_b_even count 5 EXPECTED_TO_FAIL -Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even count 10 EXPECTED_TO_FAIL -Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -589,7 +589,7 @@ count 1 0 EXPECTED_TO_FAIL -Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -597,7 +597,7 @@ count 1 0 EXPECTED_TO_FAIL -Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -612,7 +612,7 @@ count 10 0 OK -Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -627,7 +627,7 @@ count 20 0 OK -Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -642,7 +642,7 @@ count 5 0 OK -Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -657,7 +657,7 @@ count 1 0 OK -Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -672,7 +672,7 @@ count 10 0 OK -Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -687,7 +687,7 @@ count 20 0 OK -Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -702,7 +702,7 @@ count 5 0 OK -Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -717,7 +717,7 @@ count 1 0 OK -Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -732,7 +732,7 @@ count 10 0 OK -Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -747,7 +747,7 @@ count 20 0 OK -Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -762,7 +762,7 @@ count 5 0 OK -Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -777,7 +777,7 @@ count 1 0 OK -Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -792,7 +792,7 @@ count 10 0 OK -Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -807,7 +807,7 @@ count 20 0 OK -Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -822,7 +822,7 @@ count 5 0 OK -Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -837,7 +837,7 @@ count 1 0 OK -Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -852,7 +852,967 @@ count 10 0 OK -Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +FIXED + +Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +FIXED + +Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +FIXED + +Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +FIXED + +Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +FIXED + +Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +FIXED + +Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh index 5b07f6033ad..ed50110b7eb 100755 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh @@ -15,7 +15,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # fails, it is a error. The same situation as first one, but on dst table. RUN_ONLY="" -#RUN_ONLY="" +#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" KNOWN_ERRORS=(8 9 10 11 12 13) @@ -23,7 +23,7 @@ function is_known_error() { n=$1 for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then return 0 fi done @@ -31,56 +31,61 @@ function is_known_error() } i=0 -for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do +for insert_method in "InsertSelect" "InsertValues"; do + for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do - THIS_RUN="Test case $i:" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done done done done diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference index eccdbd52f37..4411bdecea8 100644 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference @@ -1,5 +1,5 @@ -Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -14,7 +14,7 @@ count 47 0 OK -Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -29,7 +29,7 @@ count 45 0 OK -Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -44,7 +44,7 @@ count 94 0 OK -Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -59,7 +59,7 @@ count 90 0 OK -Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -74,7 +74,7 @@ count 47 0 OK -Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -89,7 +89,7 @@ count 45 0 OK -Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -104,7 +104,7 @@ count 94 0 OK -Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -119,35 +119,35 @@ count 90 0 OK -Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 1 table_when_b_even_and_joined count 10 EXPECTED_TO_FAIL -Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined count 9 EXPECTED_TO_FAIL -Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 1 table_when_b_even_and_joined count 47 EXPECTED_TO_FAIL -Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined count 45 EXPECTED_TO_FAIL -Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -155,7 +155,7 @@ count 10 0 EXPECTED_TO_FAIL -Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -163,7 +163,7 @@ count 9 0 EXPECTED_TO_FAIL -Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -178,7 +178,7 @@ count 94 0 OK -Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -193,7 +193,7 @@ count 90 0 OK -Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -201,7 +201,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -216,7 +216,7 @@ count 9 0 OK -Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -231,7 +231,7 @@ count 94 0 OK -Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -246,7 +246,7 @@ count 90 0 OK -Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -254,7 +254,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -269,7 +269,7 @@ count 9 0 OK -Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -284,7 +284,7 @@ count 94 0 OK -Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -299,7 +299,7 @@ count 90 0 OK -Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -307,7 +307,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -322,7 +322,7 @@ count 9 0 OK -Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -337,7 +337,7 @@ count 94 0 OK -Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -352,7 +352,7 @@ count 90 0 OK -Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -360,7 +360,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -375,7 +375,7 @@ count 9 0 OK -Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -390,7 +390,7 @@ count 94 0 OK -Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -405,7 +405,7 @@ count 90 0 OK -Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -420,7 +420,7 @@ count 47 0 OK -Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -435,7 +435,7 @@ count 45 0 OK -Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -450,7 +450,7 @@ count 94 0 OK -Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -465,7 +465,7 @@ count 90 0 OK -Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -480,7 +480,7 @@ count 47 0 OK -Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -495,7 +495,7 @@ count 45 0 OK -Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -510,7 +510,7 @@ count 94 0 OK -Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -525,35 +525,35 @@ count 90 0 OK -Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 1 table_when_b_even_and_joined count 10 EXPECTED_TO_FAIL -Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined count 9 EXPECTED_TO_FAIL -Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 1 table_when_b_even_and_joined count 47 EXPECTED_TO_FAIL -Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined count 45 EXPECTED_TO_FAIL -Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -561,7 +561,7 @@ count 10 0 EXPECTED_TO_FAIL -Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -569,7 +569,7 @@ count 9 0 EXPECTED_TO_FAIL -Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -584,7 +584,7 @@ count 94 0 OK -Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -599,7 +599,7 @@ count 90 0 OK -Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -607,7 +607,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -622,7 +622,7 @@ count 9 0 OK -Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -637,7 +637,7 @@ count 94 0 OK -Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -652,7 +652,7 @@ count 90 0 OK -Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -660,7 +660,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -675,7 +675,7 @@ count 9 0 OK -Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -690,7 +690,7 @@ count 94 0 OK -Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -705,7 +705,7 @@ count 90 0 OK -Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -713,7 +713,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -728,7 +728,7 @@ count 9 0 OK -Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -743,7 +743,7 @@ count 94 0 OK -Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -758,7 +758,7 @@ count 90 0 OK -Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -766,7 +766,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -781,7 +781,7 @@ count 9 0 OK -Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -796,7 +796,911 @@ count 94 0 OK -Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED + +Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED + +Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +FIXED + +Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +FIXED + +Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED + +Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED + +Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED + +Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED + +Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +FIXED + +Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +FIXED + +Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED + +Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED + +Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh index 1dd648583c6..61996905135 100755 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh @@ -29,7 +29,7 @@ function is_known_error() { n=$1 for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then return 0 fi done @@ -37,56 +37,61 @@ function is_known_error() } i=0 -for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do +for insert_method in "InsertSelect" "InsertValues"; do + for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do - THIS_RUN="Test case $i:" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done done done done diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference index 12eea604e3a..a56f7deb744 100644 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference @@ -1,5 +1,5 @@ -Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -10,7 +10,7 @@ table_dst count 6 0 OK -Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -21,7 +21,7 @@ table_dst count 16 0 OK -Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -32,7 +32,7 @@ table_dst count 12 0 OK -Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -43,7 +43,7 @@ table_dst count 32 0 OK -Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -54,7 +54,7 @@ table_dst count 6 0 OK -Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -65,7 +65,7 @@ table_dst count 16 0 OK -Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -76,7 +76,7 @@ table_dst count 12 0 OK -Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -87,39 +87,39 @@ table_dst count 32 0 OK -Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 1 table_dst count 2 EXPECTED_TO_FAIL -Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 2 EXPECTED_TO_FAIL -Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 1 table_dst count 6 EXPECTED_TO_FAIL -Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 EXPECTED_TO_FAIL -Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 2 0 EXPECTED_TO_FAIL -Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 2 0 EXPECTED_TO_FAIL -Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -130,7 +130,7 @@ table_dst count 12 0 OK -Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -141,13 +141,13 @@ table_dst count 32 0 OK -Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 1 0 @@ -157,7 +157,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -168,7 +168,7 @@ table_dst count 12 0 OK -Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 0 @@ -179,13 +179,13 @@ table_dst count 32 0 OK -Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 1 0 @@ -195,7 +195,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -206,7 +206,7 @@ table_dst count 12 0 OK -Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -217,13 +217,13 @@ table_dst count 32 0 OK -Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 1 0 @@ -233,7 +233,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -244,7 +244,7 @@ table_dst count 12 0 OK -Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 0 @@ -255,13 +255,13 @@ table_dst count 32 0 OK -Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 1 0 @@ -271,7 +271,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -282,7 +282,7 @@ table_dst count 12 0 OK -Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -293,7 +293,7 @@ table_dst count 32 0 OK -Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -304,7 +304,7 @@ table_dst count 6 0 OK -Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -315,7 +315,7 @@ table_dst count 16 0 OK -Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -326,7 +326,7 @@ table_dst count 12 0 OK -Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -337,7 +337,7 @@ table_dst count 32 0 OK -Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -348,7 +348,7 @@ table_dst count 6 0 OK -Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -359,7 +359,7 @@ table_dst count 16 0 OK -Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -370,7 +370,7 @@ table_dst count 12 0 OK -Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -381,39 +381,39 @@ table_dst count 32 0 OK -Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 1 table_dst count 2 EXPECTED_TO_FAIL -Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 2 EXPECTED_TO_FAIL -Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 1 table_dst count 6 EXPECTED_TO_FAIL -Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 EXPECTED_TO_FAIL -Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 2 0 EXPECTED_TO_FAIL -Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 2 0 EXPECTED_TO_FAIL -Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -424,7 +424,7 @@ table_dst count 12 0 OK -Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -435,13 +435,13 @@ table_dst count 32 0 OK -Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 1 0 @@ -451,7 +451,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -462,7 +462,7 @@ table_dst count 12 0 OK -Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 0 @@ -473,13 +473,13 @@ table_dst count 32 0 OK -Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 1 0 @@ -489,7 +489,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -500,7 +500,7 @@ table_dst count 12 0 OK -Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -511,13 +511,13 @@ table_dst count 32 0 OK -Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 1 0 @@ -527,7 +527,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -538,7 +538,7 @@ table_dst count 12 0 OK -Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 0 @@ -549,13 +549,13 @@ table_dst count 32 0 OK -Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 1 0 @@ -565,7 +565,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -576,7 +576,663 @@ table_dst count 12 0 OK -Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED + +Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +FIXED + +Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +FIXED + +Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +FIXED + +Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +FIXED + +Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +FIXED + +Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED + +Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +FIXED + +Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +FIXED + +Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +FIXED + +Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +FIXED + +Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +FIXED + +Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh index 487b3ac5f88..3d2814ed77d 100755 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh @@ -32,7 +32,7 @@ function is_known_error() { n=$1 for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then return 0 fi done @@ -40,59 +40,64 @@ function is_known_error() } RUN_ONLY="" -#RUN_ONLY="Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True" +#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" i=0 -for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do +for insert_method in "InsertSelect" "InsertValues"; do + for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do - THIS_RUN="Test case $i:" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done done done done From ac27860b49c45cb0861d3989171be493140b0eb5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 20 Apr 2024 15:58:25 +0000 Subject: [PATCH 062/439] Automatic style fix --- src/Processors/Sinks/SinkToStorage.cpp | 3 +- src/Processors/Sinks/SinkToStorage.h | 21 +-- .../Transforms/NumberBlocksTransform.h | 9 +- src/Storages/MergeTree/MergeTreeSink.cpp | 9 +- src/Storages/MergeTree/MergeTreeSink.h | 1 - .../MergeTree/ReplicatedMergeTreeSink.cpp | 8 +- .../MergeTree/ReplicatedMergeTreeSink.h | 1 - .../0_stateless/03008_deduplication.python | 132 ++++++++++++++---- 8 files changed, 128 insertions(+), 56 deletions(-) diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 146bd4505a4..fff4a881e3d 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -15,9 +15,8 @@ void SinkToStorage::onConsume(Chunk chunk) */ Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); - setDeduplicationTokenForChildren(chunk); - fillDeduplicationTokenForChildren(chunk); consume(chunk); + fillDeduplicationTokenForChildren(chunk); if (!lastBlockIsDuplicate()) // TODO: remove that cur_chunk = std::move(chunk); } diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index 07a944b0943..21e003c4317 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -25,24 +25,12 @@ protected: virtual void consume(Chunk & chunk) = 0; virtual bool lastBlockIsDuplicate() const { return false; } - virtual std::shared_ptr setDeduplicationTokenForChildren(Chunk & chunk) const + void fillDeduplicationTokenForChildren(Chunk & chunk) const { auto token_info = chunk.getChunkInfos().get(); if (token_info) - return token_info; + return; - auto block_dedup_token_for_children = std::make_shared(""); - chunk.getChunkInfos().add(block_dedup_token_for_children); - return block_dedup_token_for_children; - } - - virtual std::shared_ptr getDeduplicationTokenForChildren(Chunk & chunk) const - { - return chunk.getChunkInfos().get(); - } - - virtual void fillDeduplicationTokenForChildren(Chunk & chunk) const - { SipHash hash; for (const auto & colunm: chunk.getColumns()) { @@ -50,8 +38,9 @@ protected: } const auto hash_value = hash.get128(); - chunk.getChunkInfos().get()->addTokenPart( - fmt::format(":hash-{}", toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]))); + chunk.getChunkInfos().add(std::make_shared( + fmt::format(":hash-{}", toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])) + )); } private: diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index ca990a925c1..9bc23a583d3 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -49,6 +49,7 @@ namespace DB class DedupTokenInfo : public ChunkInfoCloneable { public: + DedupTokenInfo() = default; DedupTokenInfo(const DedupTokenInfo & other) = default; explicit DedupTokenInfo(String first_part) { @@ -68,9 +69,15 @@ namespace DB return result; } + bool empty() const + { + return token_parts.empty(); + } + void addTokenPart(String part) { - token_parts.push_back(std::move(part)); + if (!part.empty()) + token_parts.push_back(std::move(part)); } private: diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 2e455cd2bd5..ce7833d25da 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -84,6 +85,7 @@ void MergeTreeSink::consume(Chunk & chunk) bool support_parallel_write = false; String block_dedup_token; + std::shared_ptr dedub_token_info_for_children = nullptr; if (storage.getDeduplicationLog()) { auto token_info = chunk.getChunkInfos().get(); @@ -102,6 +104,9 @@ void MergeTreeSink::consume(Chunk & chunk) } else { + dedub_token_info_for_children = std::make_shared(); + chunk.getChunkInfos().add(dedub_token_info_for_children); + LOG_DEBUG(storage.log, "dedup token from hash is caclulated"); } @@ -126,9 +131,9 @@ void MergeTreeSink::consume(Chunk & chunk) current_block.block.clear(); current_block.partition.clear(); - if (auto children_dedup_token = getDeduplicationTokenForChildren(chunk)) + if (dedub_token_info_for_children) { - children_dedup_token->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); } /// If optimize_on_insert setting is true, current_block could become empty after merge diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index 4e1ca5c1f60..8f065773d6a 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -41,7 +41,6 @@ private: struct DelayedChunk; std::unique_ptr delayed_chunk; - void fillDeduplicationTokenForChildren(Chunk &) const override { /* For MergeTree we get the tokens from part checksums */ } void finishDelayedChunk(); }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index ce140c93cbe..1712170dddd 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -293,6 +294,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } String block_dedup_token; + std::shared_ptr dedub_token_info_for_children = nullptr; if constexpr (!async_insert) { auto token_info = chunk.getChunkInfos().get(); @@ -314,6 +316,8 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } else { + dedub_token_info_for_children = std::make_shared(); + chunk.getChunkInfos().add(dedub_token_info_for_children); LOG_DEBUG(storage.log, "dedup token from hash is caclulated"); } @@ -382,9 +386,9 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - if (auto children_dedup_token = getDeduplicationTokenForChildren(chunk)) + if (dedub_token_info_for_children) { - children_dedup_token->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index b1eff67d845..e460804d7f1 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -139,7 +139,6 @@ private: /// We can delay processing for previous chunk and start writing a new one. std::unique_ptr delayed_chunk; - void fillDeduplicationTokenForChildren(Chunk &) const override { /* For MergeTree we get the tokens from part checksums */ } void finishDelayedChunk(const ZooKeeperWithFaultInjectionPtr & zookeeper); }; diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python index 87c48a73513..89dbea97667 100644 --- a/tests/queries/0_stateless/03008_deduplication.python +++ b/tests/queries/0_stateless/03008_deduplication.python @@ -22,7 +22,14 @@ def __format(template, **params): return template.format(**kv_args) -def instance_create_statement(table_name, table_columns, table_keys, table_engine, with_deduplication, no_merges=True): +def instance_create_statement( + table_name, + table_columns, + table_keys, + table_engine, + with_deduplication, + no_merges=True, +): template = """ CREATE TABLE {table_name} {table_columns} @@ -37,22 +44,36 @@ def instance_create_statement(table_name, table_columns, table_keys, table_engin params["table_columns"] = table_columns params["table_keys"] = table_keys params["table_no_merges"] = f"SYSTEM STOP MERGES {table_name};" if no_merges else "" - params["table_engine"] = "MergeTree()" if table_engine == "MergeTree" else f"ReplicatedMergeTree('/clickhouse/tables/{{database}}/{table_name}', '1')" + params["table_engine"] = ( + "MergeTree()" + if table_engine == "MergeTree" + else f"ReplicatedMergeTree('/clickhouse/tables/{{database}}/{table_name}', '1')" + ) - deduplication_window_setting_name = "non_replicated_deduplication_window" if table_engine == "MergeTree" else "replicated_deduplication_window" + deduplication_window_setting_name = ( + "non_replicated_deduplication_window" + if table_engine == "MergeTree" + else "replicated_deduplication_window" + ) deduplication_window_setting_value = 1000 if with_deduplication else 0 settings = list() - settings += [f"{deduplication_window_setting_name}={deduplication_window_setting_value}"] + settings += [ + f"{deduplication_window_setting_name}={deduplication_window_setting_value}" + ] params["table_settings"] = "SETTINGS " + ",".join(settings) return __format(template, **params) -def instance_insert_statement(table_name, count, insert_method, insert_unique_blocks, use_insert_token): - insert_settings = "" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'" +def instance_insert_statement( + table_name, count, insert_method, insert_unique_blocks, use_insert_token +): + insert_settings = ( + "" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'" + ) - if insert_method == 'InsertSelect': + if insert_method == "InsertSelect": template = """ INSERT INTO {table_name} SELECT {insert_columns} @@ -62,7 +83,9 @@ def instance_insert_statement(table_name, count, insert_method, insert_unique_bl template, table_name=table_name, count=count, - insert_columns="'src_4', 4" if not insert_unique_blocks else "'src_' || toString(number), number", + insert_columns="'src_4', 4" + if not insert_unique_blocks + else "'src_' || toString(number), number", insert_settings=insert_settings, ) @@ -74,7 +97,9 @@ def instance_insert_statement(table_name, count, insert_method, insert_unique_bl values = [] for i in range(count): - values += [f"('src_{i}', {i})"] if insert_unique_blocks else ["('src_4', 4)"] + values += ( + [f"('src_{i}', {i})"] if insert_unique_blocks else ["('src_4', 4)"] + ) insert_values = ", ".join(values) return __format( @@ -86,7 +111,9 @@ def instance_insert_statement(table_name, count, insert_method, insert_unique_bl def get_drop_tables_statements(tables): - return "".join([f"DROP TABLE IF EXISTS {table_name};\n" for table_name in tables[::-1]]) + return "".join( + [f"DROP TABLE IF EXISTS {table_name};\n" for table_name in tables[::-1]] + ) def get_logs_statement(args): @@ -94,15 +121,17 @@ def get_logs_statement(args): return "SET send_logs_level='test';" return "" + def str2bool(v): if isinstance(v, bool): return v - if v.lower() in ('yes', 'true', 't', 'y', '1'): + if v.lower() in ("yes", "true", "t", "y", "1"): return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): + elif v.lower() in ("no", "false", "f", "n", "0"): return False else: - raise argparse.ArgumentTypeError('Boolean value expected.') + raise argparse.ArgumentTypeError("Boolean value expected.") + class ArgsFactory: def __init__(self, parser): @@ -110,29 +139,55 @@ class ArgsFactory: def add_opt_engine(self): self.__parser.add_argument( - "--table-engine", choices=["ReplicatedMergeTree", "MergeTree"], default="MergeTree") + "--table-engine", + choices=["ReplicatedMergeTree", "MergeTree"], + default="MergeTree", + ) def add_opt_user_token(self): - self.__parser.add_argument("--use-insert-token", type=str2bool, nargs='?', const=True, default=False) + self.__parser.add_argument( + "--use-insert-token", type=str2bool, nargs="?", const=True, default=False + ) def add_opt_single_thread(self): - self.__parser.add_argument("--single-thread", type=str2bool, nargs='?', const=True, default=True) + self.__parser.add_argument( + "--single-thread", type=str2bool, nargs="?", const=True, default=True + ) def add_opt_dedup_src(self): - self.__parser.add_argument("--deduplicate-src-table", type=str2bool, nargs='?', const=True, default=True) + self.__parser.add_argument( + "--deduplicate-src-table", + type=str2bool, + nargs="?", + const=True, + default=True, + ) def add_opt_dedup_dst(self): - self.__parser.add_argument("--deduplicate-dst-table", type=str2bool, nargs='?', const=True, default=True) + self.__parser.add_argument( + "--deduplicate-dst-table", + type=str2bool, + nargs="?", + const=True, + default=True, + ) def add_opt_get_logs(self): - self.__parser.add_argument("--get-logs", type=str2bool, nargs='?', const=True, default=False) + self.__parser.add_argument( + "--get-logs", type=str2bool, nargs="?", const=True, default=False + ) def add_opt_uniq_blocks(self): - self.__parser.add_argument("--insert-unique-blocks", type=str2bool, nargs='?', const=True, default=True) + self.__parser.add_argument( + "--insert-unique-blocks", type=str2bool, nargs="?", const=True, default=True + ) def add_opt_insert_method(self): self.__parser.add_argument( - "--insert-method", choices=["InsertSelect", "InsertValues"], default="InsertSelect") + "--insert-method", + choices=["InsertSelect", "InsertValues"], + default="InsertSelect", + ) def add_all(self): self.add_opt_engine() @@ -174,10 +229,16 @@ def test_insert_several_blocks(parser): WHERE b % 2 = 0; """ - drop_tables_statements = get_drop_tables_statements( ["table_a_b", "table_when_b_even", "mv_b_even"] ) + drop_tables_statements = get_drop_tables_statements( + ["table_a_b", "table_when_b_even", "mv_b_even"] + ) insert_statement = instance_insert_statement( - "table_a_b", 10, args.insert_method, args.insert_unique_blocks, args.use_insert_token + "table_a_b", + 10, + args.insert_method, + args.insert_unique_blocks, + args.use_insert_token, ) print_details_statements = f""" @@ -190,8 +251,6 @@ def test_insert_several_blocks(parser): {"" if not args.get_logs else "SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part;"} """ - - if args.insert_unique_blocks: assert_first_insert_statements = f""" SELECT throwIf( count() != 10 ) @@ -278,7 +337,12 @@ def test_mv_generates_several_blocks(parser): ArgsFactory(parser).add_all() def calle(args): - tables = ["table_for_join_with", "table_a_b", "table_when_b_even_and_joined", "mv_b_even"] + tables = [ + "table_for_join_with", + "table_a_b", + "table_when_b_even_and_joined", + "mv_b_even", + ] drop_tables_statements = get_drop_tables_statements(tables) details_print_for_table_for_join_with = "" @@ -305,7 +369,11 @@ def test_mv_generates_several_blocks(parser): ) insert_statement = instance_insert_statement( - "table_a_b", 5, args.insert_method, args.insert_unique_blocks, args.use_insert_token + "table_a_b", + 5, + args.insert_method, + args.insert_unique_blocks, + args.use_insert_token, ) details_print_statements = f""" @@ -449,7 +517,11 @@ def test_several_mv_into_one_table(parser): ) insert_statement = instance_insert_statement( - "table_src", 8, args.insert_method, args.insert_unique_blocks, args.use_insert_token + "table_src", + 8, + args.insert_method, + args.insert_unique_blocks, + args.use_insert_token, ) details_print_statements = f""" @@ -568,9 +640,7 @@ def parse_args(): test_mv_generates_several_blocks( subparsers.add_parser("mv_generates_several_blocks") ) - test_several_mv_into_one_table( - subparsers.add_parser("several_mv_into_one_table") - ) + test_several_mv_into_one_table(subparsers.add_parser("several_mv_into_one_table")) args = parser.parse_args() if args.test is None: parser.print_help() From c7908f62d056c5a96f0bea743de48e48399c0f91 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 1 May 2024 14:06:04 +0200 Subject: [PATCH 063/439] fix sigfault --- src/Storages/MergeTree/MergeTreeSink.cpp | 11 ++++++----- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index ce7833d25da..7ca6ed10a76 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -131,16 +131,17 @@ void MergeTreeSink::consume(Chunk & chunk) current_block.block.clear(); current_block.partition.clear(); - if (dedub_token_info_for_children) - { - dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); - } - /// If optimize_on_insert setting is true, current_block could become empty after merge /// and we didn't create part. if (!temp_part.part) continue; + if (dedub_token_info_for_children) + { + chassert(temp_part.part); + dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + } + if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) support_parallel_write = true; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 1712170dddd..3c1e2bc9219 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -388,6 +388,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if (dedub_token_info_for_children) { + chassert(temp_part.part); dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); } } From 687b5940fa37563f2121ab2f915edd513a2c8b6e Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 1 May 2024 14:45:48 +0200 Subject: [PATCH 064/439] fix style --- src/Interpreters/InterpreterCreateQuery.cpp | 6 ++---- src/Interpreters/InterpreterInsertQuery.cpp | 6 ++++-- src/QueryPipeline/QueryPlanResourceHolder.h | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 7 ++++++- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 2 +- 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index a143ca867e1..35ed6c9ab69 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1691,14 +1691,12 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) insert->select = create.select->clone(); return InterpreterInsertQuery( - insert, + insert, getContext(), getContext()->getSettingsRef().insert_allow_materialized_columns, false, false, - false - ) - .execute(); + false).execute(); } return {}; diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 40d5a84031d..62bffcfe6a1 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -592,10 +592,12 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline() { pipeline.resize(1); - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr { + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { return std::make_shared(settings.insert_deduplication_token.value, in_header); }); - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr { + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { return std::make_shared(in_header); }); } diff --git a/src/QueryPipeline/QueryPlanResourceHolder.h b/src/QueryPipeline/QueryPlanResourceHolder.h index e40fa04f72c..10f7f39ab09 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.h +++ b/src/QueryPipeline/QueryPlanResourceHolder.h @@ -19,7 +19,7 @@ struct QueryPlanResourceHolder QueryPlanResourceHolder(); QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept; ~QueryPlanResourceHolder(); - + QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &) = delete; /// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs. diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 7ca6ed10a76..2d29f87c556 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -13,6 +13,11 @@ namespace ProfileEvents extern const Event DuplicatedInsertedBlocks; } +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace DB { @@ -108,7 +113,7 @@ void MergeTreeSink::consume(Chunk & chunk) chunk.getChunkInfos().add(dedub_token_info_for_children); LOG_DEBUG(storage.log, - "dedup token from hash is caclulated"); + "dedup token from hash is calculated"); } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 3c1e2bc9219..e855bb7d969 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -319,7 +319,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) dedub_token_info_for_children = std::make_shared(); chunk.getChunkInfos().add(dedub_token_info_for_children); LOG_DEBUG(storage.log, - "dedup token from hash is caclulated"); + "dedup token from hash is calculated"); } } From f1493a40a4b70251b12bc06bdd43bbd9eeadfe8b Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 1 May 2024 15:44:30 +0200 Subject: [PATCH 065/439] fix tests --- src/Interpreters/AsynchronousInsertQueue.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 65035790729..e1595243ae3 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -780,7 +780,12 @@ try try { interpreter = std::make_unique( - key.query, insert_context, key.settings.insert_allow_materialized_columns, true, false, false); + key.query, + insert_context, + key.settings.insert_allow_materialized_columns, + false, + false, + true); pipeline = interpreter->execute().pipeline; chassert(pipeline.pushing()); From fa667b454366c724dcfa59872de4d89c987fa5bd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 1 May 2024 18:21:32 +0200 Subject: [PATCH 066/439] fix tests --- src/Interpreters/InterpreterCheckQuery.cpp | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index e070d8694a7..81bb6290acb 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -12,7 +12,6 @@ #include #include #include -#include "Processors/Chunk.h" #include #include @@ -24,6 +23,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 62bffcfe6a1..bbff38a06bf 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,6 @@ #include #include #include -#include "Interpreters/Context_fwd.h" namespace ProfileEvents @@ -682,6 +682,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline() chain.addSource(std::move(counting)); QueryPipeline pipeline = QueryPipeline(std::move(chain)); + pipeline.setNumThreads(std::min(pipeline.getNumThreads(), settings.max_threads)); pipeline.setConcurrencyControl(settings.use_concurrency_control); @@ -735,7 +736,11 @@ BlockIO InterpreterInsertQuery::execute() { if (settings.parallel_distributed_insert_select) { - res.pipeline = *table->distributedWrite(query, getContext()); + auto distributed = table->distributedWrite(query, getContext()); + if (distributed) + res.pipeline = std::move(*distributed); + else + res.pipeline = buildInsertSelectPipeline(); } else { From 2d4216ecada4593ba43af441c6fd44f694e1867d Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 2 May 2024 13:00:57 +0200 Subject: [PATCH 067/439] debugging --- src/Interpreters/InterpreterInsertQuery.cpp | 10 ++++++++++ src/Storages/StorageDistributed.cpp | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index bbff38a06bf..435c616f27c 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -499,6 +499,10 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline() InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options); pipeline = interpreter_select.buildQueryPipeline(); } + + // auto resources = QueryPlanResourceHolder(); + // resources.interpreter_context.push_back(select_context); + // pipeline.addResources(std::move(resources)); } pipeline.dropTotalsAndExtremes(); @@ -710,6 +714,12 @@ BlockIO InterpreterInsertQuery::execute() StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); + if (auto * dist_storage = dynamic_cast(table.get())) + { + LOG_DEBUG(getLogger("InsertQuery"), + "dist_storage engine {} table name {}.{}", dist_storage->getName(), dist_storage->getStorageID().database_name, dist_storage->getStorageID().table_name); + } + if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 0478936fdfc..1ecb83aa120 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -23,6 +23,7 @@ #include +#include "Common/logger_useful.h" #include #include #include @@ -106,6 +107,7 @@ #include #include +#include #include #include #include @@ -1822,10 +1824,14 @@ void StorageDistributed::renameOnDisk(const String & new_path_to_table_data) void StorageDistributed::delayInsertOrThrowIfNeeded() const { + LOG_WARNING(log, "delayInsertOrThrowIfNeeded"); + if (!distributed_settings.bytes_to_throw_insert && !distributed_settings.bytes_to_delay_insert) return; + LOG_WARNING(log, "delayInsertOrThrowIfNeeded getContext() is null: {}", getContext() == nullptr); + UInt64 total_bytes = *totalBytes(getContext()->getSettingsRef()); if (distributed_settings.bytes_to_throw_insert && total_bytes > distributed_settings.bytes_to_throw_insert) From db6951e48897d297f443dfb85f8da76add6e4f0d Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 2 May 2024 15:43:49 +0200 Subject: [PATCH 068/439] fix distributed inserts --- src/Common/CollectionOfDerived.h | 14 ++++++++++ src/Interpreters/InterpreterInsertQuery.cpp | 26 ++++++++++++------- src/Interpreters/InterpreterInsertQuery.h | 4 +-- .../FinishAggregatingInOrderAlgorithm.cpp | 11 +++++--- src/Storages/StorageDistributed.cpp | 4 --- 5 files changed, 39 insertions(+), 20 deletions(-) diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h index 8579c4dd50c..c98e375b4b1 100644 --- a/src/Common/CollectionOfDerived.h +++ b/src/Common/CollectionOfDerived.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -109,6 +110,19 @@ public: return cast; } + std::string debug() const + { + std::string result; + + for (auto & rec : records) + { + result.append(rec.type_idx.name()); + result.append(" "); + } + + return result; + } + private: bool isUniqTypes() const { diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 435c616f27c..fdf77486c85 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -429,12 +429,10 @@ std::pair, std::vector> InterpreterInsertQuery::buildP } -QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline() +QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table) { const Settings & settings = getContext()->getSettingsRef(); - auto & query = query_ptr->as(); - StoragePtr table = getTable(query); auto metadata_snapshot = table->getInMemoryMetadataPtr(); auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); @@ -641,12 +639,10 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline() } -QueryPipeline InterpreterInsertQuery::buildInsertPipeline() +QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query, StoragePtr table) { const Settings & settings = getContext()->getSettingsRef(); - auto & query = query_ptr->as(); - StoragePtr table = getTable(query); auto metadata_snapshot = table->getInMemoryMetadataPtr(); auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); @@ -714,9 +710,11 @@ BlockIO InterpreterInsertQuery::execute() StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); + bool is_table_dist = false; if (auto * dist_storage = dynamic_cast(table.get())) { - LOG_DEBUG(getLogger("InsertQuery"), + is_table_dist = true; + LOG_DEBUG(getLogger("InsertQuery"), "dist_storage engine {} table name {}.{}", dist_storage->getName(), dist_storage->getStorageID().database_name, dist_storage->getStorageID().table_name); } @@ -748,18 +746,26 @@ BlockIO InterpreterInsertQuery::execute() { auto distributed = table->distributedWrite(query, getContext()); if (distributed) + { + LOG_DEBUG(getLogger("InsertQuery"),"as dist pipeline, is_table_dist {}", is_table_dist); res.pipeline = std::move(*distributed); + } else - res.pipeline = buildInsertSelectPipeline(); + { + LOG_DEBUG(getLogger("InsertQuery"),"as insert select after dist, is_table_dist {}", is_table_dist); + res.pipeline = buildInsertSelectPipeline(query, table); + } } else { - res.pipeline = buildInsertSelectPipeline(); + LOG_DEBUG(getLogger("InsertQuery"),"as insert select, is_table_dist {}", is_table_dist); + res.pipeline = buildInsertSelectPipeline(query, table); } } else { - res.pipeline = buildInsertPipeline(); + LOG_DEBUG(getLogger("InsertQuery"),"as just insert, is_table_dist {}", is_table_dist); + res.pipeline = buildInsertPipeline(query, table); } res.pipeline.addStorageHolder(table); diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 3f3b7a6f106..b06bb9a3db2 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -81,8 +81,8 @@ private: std::pair, std::vector> buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); - QueryPipeline buildInsertSelectPipeline(); - QueryPipeline buildInsertPipeline(); + QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table); + QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table); Chain buildSink( const StoragePtr & table, diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp index f33cc267c44..ae47de4a81e 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp @@ -51,11 +51,14 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num if (!input.chunk.hasRows()) return; - const auto & arenas_info = input.chunk.getChunkInfos().get(); - if (!arenas_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "ChunkInfoWithAllocatedBytes was not set for chunk in FinishAggregatingInOrderAlgorithm"); + if (input.chunk.getChunkInfos().empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm"); - states[source_num] = State{input.chunk, description, arenas_info->allocated_bytes}; + Int64 allocated_bytes = 0; + if (auto arenas_info = input.chunk.getChunkInfos().get()) + allocated_bytes = arenas_info->allocated_bytes; + + states[source_num] = State{input.chunk, description, allocated_bytes}; } IMergingAlgorithm::Status FinishAggregatingInOrderAlgorithm::merge() diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 1ecb83aa120..747bb2c2080 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1824,14 +1824,10 @@ void StorageDistributed::renameOnDisk(const String & new_path_to_table_data) void StorageDistributed::delayInsertOrThrowIfNeeded() const { - LOG_WARNING(log, "delayInsertOrThrowIfNeeded"); - if (!distributed_settings.bytes_to_throw_insert && !distributed_settings.bytes_to_delay_insert) return; - LOG_WARNING(log, "delayInsertOrThrowIfNeeded getContext() is null: {}", getContext() == nullptr); - UInt64 total_bytes = *totalBytes(getContext()->getSettingsRef()); if (distributed_settings.bytes_to_throw_insert && total_bytes > distributed_settings.bytes_to_throw_insert) From 7fe1fe11b9c59eda01cb20f506f3c3e95398668d Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 3 May 2024 22:43:47 +0200 Subject: [PATCH 069/439] fixing tests --- src/Columns/ColumnObject.cpp | 6 +++ src/Columns/ColumnObject.h | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 10 ++--- src/Interpreters/SquashingTransform.cpp | 20 ++++----- src/Interpreters/SquashingTransform.h | 11 +++-- src/Processors/ISimpleTransform.h | 4 ++ .../Transforms/CountingTransform.cpp | 3 ++ src/Processors/Transforms/CountingTransform.h | 4 ++ .../Transforms/ExpressionTransform.cpp | 7 ++++ .../Transforms/MaterializingTransform.cpp | 6 +++ .../Transforms/NumberBlocksTransform.h | 20 +++++++++ .../Transforms/SquashingChunksTransform.cpp | 42 +++++++++++++++---- .../Transforms/SquashingChunksTransform.h | 1 + .../Transforms/buildPushingToViewsChain.cpp | 25 ++++++----- src/Server/TCPHandler.cpp | 6 +-- src/Storages/MergeTree/MutateTask.cpp | 9 ++-- ...view_and_deduplication_zookeeper.reference | 2 +- ...lized_view_and_deduplication_zookeeper.sql | 2 +- 18 files changed, 129 insertions(+), 51 deletions(-) diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 90ef974010c..ded56b60e64 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -1093,4 +1093,10 @@ void ColumnObject::finalize() checkObjectHasNoAmbiguosPaths(getKeys()); } +void ColumnObject::updateHashFast(SipHash & hash) const +{ + for (const auto & entry : subcolumns) + for (auto & part : entry->data.data) + part->updateHashFast(hash); +} } diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index e2936b27994..b1b8827622f 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -242,7 +242,7 @@ public: const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); } - void updateHashFast(SipHash &) const override { throwMustBeConcrete(); } + void updateHashFast(SipHash & hash) const override; void expand(const Filter &, bool) override { throwMustBeConcrete(); } bool hasEqualValues() const override { throwMustBeConcrete(); } size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index fdf77486c85..2961d643869 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -471,8 +471,10 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & if (table->prefersLargeBlocks()) { - new_settings.max_block_size = std::max(settings.min_insert_block_size_rows, settings.max_block_size); - new_settings.preferred_block_size_bytes = std::max(settings.min_insert_block_size_bytes, settings.preferred_block_size_bytes); + if (settings.min_insert_block_size_rows) + new_settings.max_block_size = settings.min_insert_block_size_rows; + if (settings.min_insert_block_size_bytes) + new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; } auto context_for_trivial_select = Context::createCopy(context); @@ -497,10 +499,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options); pipeline = interpreter_select.buildQueryPipeline(); } - - // auto resources = QueryPlanResourceHolder(); - // resources.interpreter_context.push_back(select_context); - // pipeline.addResources(std::move(resources)); } pipeline.dropTotalsAndExtremes(); diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index 41f024df7a7..cf4f2060414 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -15,12 +15,12 @@ SquashingTransform::SquashingTransform(size_t min_block_size_rows_, size_t min_b { } -Block SquashingTransform::add(Block && input_block) +SquashingTransform::SquashResult SquashingTransform::add(Block && input_block) { return addImpl(std::move(input_block)); } -Block SquashingTransform::add(const Block & input_block) +SquashingTransform::SquashResult SquashingTransform::add(const Block & input_block) { return addImpl(input_block); } @@ -32,14 +32,14 @@ Block SquashingTransform::add(const Block & input_block) * have to. */ template -Block SquashingTransform::addImpl(ReferenceType input_block) +SquashingTransform::SquashResult SquashingTransform::addImpl(ReferenceType input_block) { /// End of input stream. if (!input_block) { Block to_return; std::swap(to_return, accumulated_block); - return to_return; + return SquashResult{std::move(to_return), false}; } /// Just read block is already enough. @@ -48,13 +48,13 @@ Block SquashingTransform::addImpl(ReferenceType input_block) /// If no accumulated data, return just read block. if (!accumulated_block) { - return std::move(input_block); + return SquashResult{std::move(input_block), false}; } /// Return accumulated data (maybe it has small size) and place new block to accumulated data. Block to_return = std::move(input_block); std::swap(to_return, accumulated_block); - return to_return; + return SquashResult{std::move(to_return), true}; } /// Accumulated block is already enough. @@ -63,7 +63,7 @@ Block SquashingTransform::addImpl(ReferenceType input_block) /// Return accumulated data and place new block to accumulated data. Block to_return = std::move(input_block); std::swap(to_return, accumulated_block); - return to_return; + return SquashResult{std::move(to_return), true}; } append(std::move(input_block)); @@ -71,11 +71,11 @@ Block SquashingTransform::addImpl(ReferenceType input_block) { Block to_return; std::swap(to_return, accumulated_block); - return to_return; + return SquashResult{std::move(to_return), false}; } - /// Squashed block is not ready. - return {}; + /// Squashed block is not ready, input block consumed + return SquashResult{{}, true}; } diff --git a/src/Interpreters/SquashingTransform.h b/src/Interpreters/SquashingTransform.h index b04d012bcd1..f1eba537338 100644 --- a/src/Interpreters/SquashingTransform.h +++ b/src/Interpreters/SquashingTransform.h @@ -25,11 +25,16 @@ public: /// Conditions on rows and bytes are OR-ed. If one of them is zero, then corresponding condition is ignored. SquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_); + struct SquashResult + { + Block block; + bool input_block_delayed = false; + }; /** Add next block and possibly returns squashed block. * At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true. */ - Block add(Block && block); - Block add(const Block & block); + SquashResult add(Block && block); + SquashResult add(const Block & block); private: size_t min_block_size_rows; @@ -38,7 +43,7 @@ private: Block accumulated_block; template - Block addImpl(ReferenceType block); + SquashResult addImpl(ReferenceType block); template void append(ReferenceType block); diff --git a/src/Processors/ISimpleTransform.h b/src/Processors/ISimpleTransform.h index 629529cdffa..3862ea76dbb 100644 --- a/src/Processors/ISimpleTransform.h +++ b/src/Processors/ISimpleTransform.h @@ -2,6 +2,8 @@ #include +#include + namespace DB { @@ -29,6 +31,8 @@ protected: virtual void transform(Chunk & input_chunk, Chunk & output_chunk) { + LOG_DEBUG(getLogger("ISimpleTransform"), + "transform {}", input_chunk.getNumRows()); transform(input_chunk); output_chunk.swap(input_chunk); } diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 3dfb9fe178f..7329a196f8a 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -17,6 +17,9 @@ namespace DB void CountingTransform::onConsume(Chunk chunk) { + LOG_DEBUG(getLogger("CountingTransform"), + "onConsume {}", chunk.getNumRows()); + if (quota) quota->used(QuotaType::WRITTEN_BYTES, chunk.bytes()); diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h index 05d8e2aeac8..ab8d083fd05 100644 --- a/src/Processors/Transforms/CountingTransform.h +++ b/src/Processors/Transforms/CountingTransform.h @@ -4,6 +4,8 @@ #include #include +#include + namespace DB { @@ -43,6 +45,8 @@ public: void onConsume(Chunk chunk) override; GenerateResult onGenerate() override { + LOG_DEBUG(getLogger("CountingTransform"), + "onGenerate {}", cur_chunk.getNumRows()); GenerateResult res; res.chunk = std::move(cur_chunk); return res; diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 2fbd2c21b8d..db5d2b0c49c 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -1,5 +1,9 @@ #include #include + +#include + + namespace DB { @@ -17,6 +21,9 @@ ExpressionTransform::ExpressionTransform(const Block & header_, ExpressionAction void ExpressionTransform::transform(Chunk & chunk) { + LOG_DEBUG(getLogger("ExpressionTransform"), + "transform {}", chunk.getNumRows()); + size_t num_rows = chunk.getNumRows(); auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index 1eaa5458d37..8366472f876 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -1,6 +1,9 @@ #include #include +#include + + namespace DB { @@ -9,6 +12,9 @@ MaterializingTransform::MaterializingTransform(const Block & header) void MaterializingTransform::transform(Chunk & chunk) { + LOG_DEBUG(getLogger("MaterializingTransform"), + "transform {}", chunk.getNumRows()); + auto num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index 9bc23a583d3..6586f015d3e 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -228,4 +228,24 @@ namespace DB String token_part; }; + class RestoreChunkInfosTransform : public ISimpleTransform + { + public: + RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , chunk_infos(chunk_infos_) + { + } + + String getName() const override { return "RestoreChunkInfosTransform"; } + + void transform(Chunk & chunk) override + { + chunk.getChunkInfos().append(chunk_infos.clone()); + } + + private: + Chunk::ChunkInfoCollection chunk_infos; + }; + } diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 4d693e5e809..7464cb79ba6 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -17,15 +17,24 @@ SquashingChunksTransform::SquashingChunksTransform( void SquashingChunksTransform::onConsume(Chunk chunk) { + LOG_DEBUG(getLogger("SquashingChunksTransform"), + "onConsume {}", chunk.getNumRows()); + if (cur_chunkinfos.empty()) cur_chunkinfos = chunk.getChunkInfos(); - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) + auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + if (result.block) { - cur_chunk.setColumns(block.getColumns(), block.rows()); + cur_chunk.setColumns(result.block.getColumns(), result.block.rows()); cur_chunk.setChunkInfos(std::move(cur_chunkinfos)); cur_chunkinfos = {}; } + + if (cur_chunkinfos.empty() && result.input_block_delayed) + { + cur_chunkinfos = chunk.getChunkInfos(); + } } SquashingChunksTransform::GenerateResult SquashingChunksTransform::onGenerate() @@ -38,8 +47,8 @@ SquashingChunksTransform::GenerateResult SquashingChunksTransform::onGenerate() void SquashingChunksTransform::onFinish() { - auto block = squashing.add({}); - finish_chunk.setColumns(block.getColumns(), block.rows()); + auto result = squashing.add({}); + finish_chunk.setColumns(result.block.getColumns(), result.block.rows()); finish_chunk.setChunkInfos(std::move(cur_chunkinfos)); cur_chunkinfos = {}; } @@ -69,12 +78,25 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( void SimpleSquashingChunksTransform::transform(Chunk & chunk) { + LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), + "transform {}", chunk.getNumRows()); + if (!finished) { - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) + if (cur_chunkinfos.empty()) + cur_chunkinfos = chunk.getChunkInfos(); + + auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + if (result.block) { - chunk.setColumns(block.getColumns(), block.rows()); - chunk.setChunkInfos(chunk.getChunkInfos()); + chunk.setColumns(result.block.getColumns(), result.block.rows()); + chunk.setChunkInfos(std::move(cur_chunkinfos)); + cur_chunkinfos = {}; + } + + if (cur_chunkinfos.empty() && result.input_block_delayed) + { + cur_chunkinfos = chunk.getChunkInfos(); } } else @@ -82,8 +104,10 @@ void SimpleSquashingChunksTransform::transform(Chunk & chunk) if (chunk.hasRows()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - auto block = squashing.add({}); - chunk.setColumns(block.getColumns(), block.rows()); + auto result = squashing.add({}); + chunk.setColumns(result.block.getColumns(), result.block.rows()); + chunk.setChunkInfos(std::move(cur_chunkinfos)); + cur_chunkinfos = {}; } } diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index 6de96d4100d..116b9e47460 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -45,6 +45,7 @@ protected: private: SquashingTransform squashing; + Chunk::ChunkInfoCollection cur_chunkinfos; /// When consumption is finished we need to release the final chunk regardless of its size. bool finished = false; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 056f8d07627..ccecfcf3333 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -124,7 +124,6 @@ private: { QueryPipeline pipeline; PullingPipelineExecutor executor; - Chunk::ChunkInfoCollection chunk_infos; explicit State(QueryPipeline pipeline_) : pipeline(std::move(pipeline_)) @@ -397,10 +396,13 @@ std::optional generateViewChain( { out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); - if (!disable_deduplication_for_children) - { - out.addSource(std::make_shared(out.getInputHeader())); - } + // if (!disable_deduplication_for_children) + // { + // // out.addSource(std::make_shared(out.getInputHeader())); + // // out.addSource(std::make_shared(out.getInputHeader())); + + // out.addSource(std::make_shared(out.getInputHeader())); + // } auto executing_inner_query = std::make_shared( storage_header, views_data->views.back(), views_data); @@ -576,7 +578,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection chunk_infos) { const auto & context = views_data.context; @@ -623,8 +625,9 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), std::make_shared(std::move(converting)))); - pipeline.addTransform(std::make_shared(pipeline.getHeader())); - //pipeline.addTransform(std::make_shared(pipeline.getHeader())); + //pipeline.addTransform(std::make_shared(pipeline.getHeader())); + pipeline.addTransform(std::make_shared(std::move(chunk_infos), pipeline.getHeader())); + pipeline.addTransform(std::make_shared(pipeline.getHeader())); return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } @@ -727,8 +730,7 @@ ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); - state.emplace(process(block, view, *views_data)); - state->chunk_infos = chunk.getChunkInfos(); + state.emplace(process(block, view, *views_data, chunk.getChunkInfos())); } @@ -746,9 +748,6 @@ ExecutingInnerQueryFromViewTransform::GenerateResult ExecutingInnerQueryFromView break; } - // here are we copy chunk_infos to the all chunks generated from the one consumed chunk - res.chunk.getChunkInfos().append(state->chunk_infos.clone()); - if (res.is_done) state.reset(); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 3db935729b4..c21d230cba7 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -890,18 +890,18 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro while (readDataNext()) { auto result = squashing.add(std::move(state.block_for_insert)); - if (result) + if (result.block) { return PushResult { .status = PushResult::TOO_MUCH_DATA, - .insert_block = std::move(result), + .insert_block = std::move(result.block), }; } } auto result = squashing.add({}); - return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); + return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result.block), query_context); } void TCPHandler::processInsertQuery() diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 5934756fb95..ab316947ff8 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1298,7 +1298,8 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() Block projection_block; { ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); - projection_block = projection_squashes[i].add(projection.calculate(cur_block, ctx->context)); + auto result = projection_squashes[i].add(projection.calculate(cur_block, ctx->context)); + projection_block = std::move(result.block); } if (projection_block) @@ -1323,11 +1324,11 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash = projection_squashes[i]; - auto projection_block = projection_squash.add({}); - if (projection_block) + auto squash_result = projection_squash.add({}); + if (squash_result.block) { auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); + *ctx->data, ctx->log, std::move(squash_result.block), projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); temp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(temp_part.part)); diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference index adf6abb7298..741591b0dd4 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference @@ -2,7 +2,7 @@ 3 2 -3 +2 1 1 diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql index d3c4da86b41..0a41581025a 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql @@ -29,7 +29,7 @@ INSERT INTO without_deduplication VALUES (43); SELECT count() FROM with_deduplication; SELECT count() FROM without_deduplication; --- Implicit insert isn't deduplicated +-- Implicit insert is deduplicated even for MV without_deduplication_mv SELECT ''; SELECT countMerge(cnt) FROM with_deduplication_mv; SELECT countMerge(cnt) FROM without_deduplication_mv; From 9242c78fee09962abc5a28f368887e531187b5dd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 6 May 2024 21:53:22 +0200 Subject: [PATCH 070/439] work with test --- src/Interpreters/InterpreterInsertQuery.cpp | 131 +++--- src/Processors/Sinks/SinkToStorage.cpp | 4 +- src/Processors/Sinks/SinkToStorage.h | 20 - .../Transforms/NumberBlocksTransform.cpp | 156 ++++++ .../Transforms/NumberBlocksTransform.h | 391 +++++++-------- .../Transforms/SquashingChunksTransform.cpp | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 61 +-- src/Storages/FileLog/StorageFileLog.cpp | 8 +- src/Storages/MergeTree/MergeTreeSink.cpp | 21 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 28 +- .../MergeTree/ReplicatedMergeTreeSink.h | 11 - ...02912_ingestion_mv_deduplication.reference | 3 +- .../02912_ingestion_mv_deduplication.sql | 1 + ...uplication_insert_several_blocks.reference | 148 ++++-- ...tion_mv_generates_several_blocks.reference | 324 ++++++++++--- ...cation_several_mv_into_one_table.reference | 444 ++++++++++++------ 16 files changed, 1162 insertions(+), 591 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2961d643869..0f3df3752cb 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -39,6 +39,7 @@ #include #include #include +#include "base/defines.h" namespace ProfileEvents @@ -398,6 +399,9 @@ Chain InterpreterInsertQuery::buildPreSinkChain( std::pair, std::vector> InterpreterInsertQuery::buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) { + chassert(presink_streams > 0); + chassert(sink_streams > 0); + ThreadGroupPtr running_group; if (current_thread) running_group = current_thread->getThreadGroup(); @@ -410,8 +414,8 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < sink_streams; ++i) { LOG_DEBUG(getLogger("InsertQuery"), - "call buildSink table name {}.{}, stream {}/{}", - table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); + "call buildSink sink_streams table name {}.{}, stream {}/{}", + table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams); auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, running_group, /* elapsed_counter_ms= */ nullptr); @@ -421,6 +425,10 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < presink_streams; ++i) { + LOG_DEBUG(getLogger("InsertQuery"), + "call buildSink presink_streams table name {}.{}, stream {}/{}", + table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); + auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); presink_chains.emplace_back(std::move(out)); } @@ -454,6 +462,9 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & ContextPtr select_context = getContext(); + LOG_DEBUG(getLogger("InsertQuery"), + "execute() is_trivial_insert_select {} prefersLargeBlocks={} max_insert_threads {}", is_trivial_insert_select, table->prefersLargeBlocks(), settings.max_insert_threads); + if (is_trivial_insert_select) { /** When doing trivial INSERT INTO ... SELECT ... FROM table, @@ -462,9 +473,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & * to avoid unnecessary squashing. */ - LOG_DEBUG(getLogger("InsertQuery"), - "execute() is_trivial_insert_select=true prefersLargeBlocks={}", table->prefersLargeBlocks()); - Settings new_settings = select_context->getSettings(); new_settings.max_threads = std::max(1, settings.max_insert_threads); @@ -503,6 +511,11 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & pipeline.dropTotalsAndExtremes(); + LOG_DEBUG(getLogger("InsertQuery"), + "adding transforms, pipline size {}, threads {}, max_insert_threads {}", + pipeline.getNumStreams(), pipeline.getNumThreads(), settings.max_insert_threads); + + /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. if (getContext()->getSettingsRef().insert_null_as_default) { @@ -532,6 +545,56 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & } } + pipeline.resize(1); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + }); + } + + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + + if (!settings.insert_deduplication_token.value.empty()) + { + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { + return std::make_shared(settings.insert_deduplication_token.value, in_header); + }); + + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + } + + /// Number of streams works like this: + /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever + /// InterpreterSelectQuery ends up with. + /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. + /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. + /// * If the table supports parallel inserts, use max_insert_threads for writing to IStorage. + /// Otherwise ResizeProcessor them down to 1 stream. + + size_t presink_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); + size_t sink_streams_size = table->supportsParallelInsert() ? std::max(1, settings.max_insert_threads) : 1; + + auto [presink_chains, sink_chains] = buildPreAndSyncChains( + presink_streams_size, sink_streams_size, + table, metadata_snapshot, query_sample_block); + + pipeline.resize(presink_chains.size()); + auto actions_dag = ActionsDAG::makeConvertingActions( pipeline.getHeader().getColumnsWithTypeAndName(), query_sample_block.getColumnsWithTypeAndName(), @@ -560,54 +623,12 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & return counting; }); - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - }); - } - - /// Number of streams works like this: - /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever - /// InterpreterSelectQuery ends up with. - /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. - /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. - /// * If the table supports parallel inserts, use the same streams for writing to IStorage. - /// Otherwise ResizeProcessor them down to 1 stream. - - size_t presink_streams_size = std::max(1, std::max(settings.max_insert_threads, pipeline.getNumStreams())); - size_t sink_streams_size = table->supportsParallelInsert() ? presink_streams_size : 1; - - auto [presink_chains, sink_chains] = buildPreAndSyncChains( - presink_streams_size, sink_streams_size, - table, metadata_snapshot, query_sample_block); - - if (!settings.insert_deduplication_token.value.empty()) - { - pipeline.resize(1); - - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr - { - return std::make_shared(settings.insert_deduplication_token.value, in_header); - }); - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - } - - pipeline.resize(presink_chains.size()); for (auto & chain : presink_chains) pipeline.addResources(chain.detachResources()); pipeline.addChains(std::move(presink_chains)); pipeline.resize(sink_streams_size); + for (auto & chain : sink_chains) pipeline.addResources(chain.detachResources()); pipeline.addChains(std::move(sink_chains)); @@ -655,12 +676,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query chain.appendChain(std::move(sink_chains.front())); } - if (!settings.insert_deduplication_token.value.empty()) - { - chain.addSource(std::make_shared(chain.getInputHeader())); - chain.addSource(std::make_shared(settings.insert_deduplication_token.value, chain.getInputHeader())); - } - if (shouldAddSquashingFroStorage(table)) { bool table_prefers_large_blocks = table->prefersLargeBlocks(); @@ -673,6 +688,14 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query chain.addSource(std::move(squashing)); } + if (!settings.insert_deduplication_token.value.empty()) + { + chain.addSource(std::make_shared(chain.getInputHeader())); + chain.addSource(std::make_shared(settings.insert_deduplication_token.value, chain.getInputHeader())); + } + + chain.addSource(std::make_shared(chain.getInputHeader())); + auto context_ptr = getContext(); auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); counting->setProcessListElement(context_ptr->getProcessListElement()); diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index fff4a881e3d..36bb70f493f 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -16,9 +16,7 @@ void SinkToStorage::onConsume(Chunk chunk) Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); consume(chunk); - fillDeduplicationTokenForChildren(chunk); - if (!lastBlockIsDuplicate()) // TODO: remove that - cur_chunk = std::move(chunk); + cur_chunk = std::move(chunk); } SinkToStorage::GenerateResult SinkToStorage::onGenerate() diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index 21e003c4317..c350b9f79b0 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -4,7 +4,6 @@ #include #include #include -#include "Processors/Transforms/NumberBlocksTransform.h" namespace DB { @@ -23,25 +22,6 @@ public: protected: virtual void consume(Chunk & chunk) = 0; - virtual bool lastBlockIsDuplicate() const { return false; } - - void fillDeduplicationTokenForChildren(Chunk & chunk) const - { - auto token_info = chunk.getChunkInfos().get(); - if (token_info) - return; - - SipHash hash; - for (const auto & colunm: chunk.getColumns()) - { - colunm->updateHashFast(hash); - } - const auto hash_value = hash.get128(); - - chunk.getChunkInfos().add(std::make_shared( - fmt::format(":hash-{}", toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])) - )); - } private: std::vector table_locks; diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/NumberBlocksTransform.cpp index 61ff3f6bfd5..19ebf94a27a 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.cpp +++ b/src/Processors/Transforms/NumberBlocksTransform.cpp @@ -1 +1,157 @@ #include + +#include + +#include +#include +#include + + +#include + + +namespace DB +{ +namespace DeduplicationToken +{ + +String DB::DeduplicationToken::TokenInfo::getToken(bool enable_assert) const +{ + chassert(stage == MATERIALIZE_VIEW_ID || !enable_assert); + + String result; + result.reserve(getTotalSize()); + + for (const auto & part : parts) + result.append(part); + + return result; +} + +void DB::DeduplicationToken::TokenInfo::setInitialToken(String part) +{ + chassert(stage == INITIAL); + addTokenPart(std::move(part)); + stage = MATERIALIZE_VIEW_ID; +} + +void TokenInfo::setUserToken(const String & token) +{ + chassert(stage == INITIAL); + addTokenPart(fmt::format("user-token-{}", token)); + stage = SOURCE_BLOCK_NUMBER; +} + +void TokenInfo::setSourceBlockNumber(size_t sbn) +{ + chassert(stage == SOURCE_BLOCK_NUMBER); + addTokenPart(fmt::format(":source-number-{}", sbn)); + stage = MATERIALIZE_VIEW_ID; +} + +void TokenInfo::setMaterializeViewID(const String & id) +{ + chassert(stage == MATERIALIZE_VIEW_ID); + addTokenPart(fmt::format(":mv-{}", id)); + stage = MATERIALIZE_VIEW_BLOCK_NUMBER; +} + +void TokenInfo::setMaterializeViewBlockNumber(size_t mvbn) +{ + chassert(stage == MATERIALIZE_VIEW_BLOCK_NUMBER); + addTokenPart(fmt::format(":mv-bn-{}", mvbn)); + stage = MATERIALIZE_VIEW_ID; +} + +void TokenInfo::reset() +{ + stage = INITIAL; + parts.clear(); +} + +void TokenInfo::addTokenPart(String part) +{ + if (!part.empty()) + parts.push_back(std::move(part)); +} + +size_t TokenInfo::getTotalSize() const +{ + size_t size = 0; + for (const auto & part : parts) + size += part.size(); + return size; +} + +void CheckTokenTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); + + if (!must_be_present) + { + LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, no token required, token {}", debug, token_info->getToken(false)); + return; + } + + LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, token: {}", debug, token_info->getToken(false)); +} + +void SetInitialTokenTransform::transform(Chunk & chunk) +{ + auto token_builder = chunk.getChunkInfos().get(); + chassert(token_builder); + if (token_builder->tokenInitialized()) + return; + + SipHash hash; + for (const auto & colunm : chunk.getColumns()) + colunm->updateHashFast(hash); + + const auto hash_value = hash.get128(); + token_builder->setInitialToken(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); +} + +void SetUserTokenTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + chassert(token_info); + chassert(!token_info->tokenInitialized()); + token_info->setUserToken(user_token); +} + +void SetSourceBlockNumberTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + chassert(token_info); + chassert(!token_info->tokenInitialized()); + token_info->setSourceBlockNumber(block_number++); +} + +void SetMaterializeViewIDTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + chassert(token_info); + chassert(token_info->tokenInitialized()); + token_info->setMaterializeViewID(mv_id); +} + +void SetMaterializeViewBlockNumberTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + chassert(token_info); + chassert(token_info->tokenInitialized()); + token_info->setMaterializeViewBlockNumber(block_number++); +} + +void ResetTokenTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + chassert(token_info); + token_info->reset(); +} + +} +} diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index 6586f015d3e..46b62029c21 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -2,10 +2,9 @@ #include #include -#include -#include -#include +#include + namespace ErrorCodes { @@ -14,220 +13,6 @@ namespace ErrorCodes namespace DB { - struct SerialBlockNumberInfo : public ChunkInfoCloneable - { - SerialBlockNumberInfo(const SerialBlockNumberInfo & other) = default; - explicit SerialBlockNumberInfo(size_t block_number_) - : block_number(block_number_) - { - } - - size_t block_number = 0; - }; - - - class NumberBlocksTransform : public ISimpleTransform - { - public: - explicit NumberBlocksTransform(const Block & header) - : ISimpleTransform(header, header, true) - { - } - - String getName() const override { return "NumberBlocksTransform"; } - - void transform(Chunk & chunk) override - { - chunk.getChunkInfos().add(std::make_shared(block_number++)); - } - - private: - size_t block_number = 0; - }; - - - class DedupTokenInfo : public ChunkInfoCloneable - { - public: - DedupTokenInfo() = default; - DedupTokenInfo(const DedupTokenInfo & other) = default; - explicit DedupTokenInfo(String first_part) - { - addTokenPart(std::move(first_part)); - } - - String getToken() const - { - String result; - result.reserve(getTotalSize()); - - for (const auto & part : token_parts) - { - result.append(part); - } - - return result; - } - - bool empty() const - { - return token_parts.empty(); - } - - void addTokenPart(String part) - { - if (!part.empty()) - token_parts.push_back(std::move(part)); - } - - private: - size_t getTotalSize() const - { - size_t size = 0; - for (const auto & part : token_parts) - size += part.size(); - return size; - } - - std::vector token_parts; - }; - - class AddUserDeduplicationTokenTransform : public ISimpleTransform - { - public: - AddUserDeduplicationTokenTransform(String token_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , token(token_) - { - } - - String getName() const override { return "AddUserDeduplicationTokenTransform"; } - - void transform(Chunk & chunk) override - { - chunk.getChunkInfos().add(std::make_shared(token)); - } - - private: - String token; - }; - - - class CheckInsertDeduplicationTokenTransform : public ISimpleTransform - { - public: - CheckInsertDeduplicationTokenTransform(String debug_, bool must_be_present_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , debug(debug_) - , must_be_present(must_be_present_) - { - } - - String getName() const override { return "CheckInsertDeduplicationTokenTransform"; } - - void transform(Chunk & chunk) override - { - if (!must_be_present) - return; - - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); - - LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), - "{}, token: {}", - debug, token_info->getToken()); - } - - private: - String debug; - bool must_be_present = false; - }; - - - class ExtendDeduplicationWithBlockNumberFromInfoTokenTransform : public ISimpleTransform - { - public: - explicit ExtendDeduplicationWithBlockNumberFromInfoTokenTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "ExtendDeduplicationWithBlockNumberFromInfoTokenTransform"; } - - void transform(Chunk & chunk) override - { - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, recs {}", chunk.getChunkInfos().size()); - - auto block_number_info = chunk.getChunkInfos().get(); - if (!block_number_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have SerialBlockNumberInfo as ChunkInfo"); - - token_info->addTokenPart(fmt::format(":block-{}", block_number_info->block_number)); - - LOG_DEBUG(getLogger("ExtendDeduplicationWithBlockNumberFromInfoTokenTransform"), - "updated with {}, result: {}", - fmt::format(":block-{}", block_number_info->block_number), token_info->getToken()); - } - }; - - class ExtendDeduplicationWithBlockNumberTokenTransform : public ISimpleTransform - { - public: - explicit ExtendDeduplicationWithBlockNumberTokenTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "ExtendDeduplicationWithBlockNumberTokenTransform"; } - - void transform(Chunk & chunk) override - { - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo"); - - auto x = block_number++; - token_info->addTokenPart(fmt::format(":block-{}", x)); - - LOG_DEBUG(getLogger("ExtendDeduplicationWithBlockNumberTokenTransform"), - "updated with {}, result: {}", - fmt::format(":block-{}", x), token_info->getToken()); - } - private: - size_t block_number = 0; - }; - - class ExtendDeduplicationWithTokenPartTransform : public ISimpleTransform - { - public: - ExtendDeduplicationWithTokenPartTransform(String token_part_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , token_part(token_part_) - { - } - - String getName() const override { return "ExtendDeduplicationWithBlockNumberTokenTransform"; } - - void transform(Chunk & chunk) override - { - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, try to add token part {}", token_part); - - token_info->addTokenPart(fmt::format("{}", token_part)); - - LOG_DEBUG(getLogger("ExtendDeduplicationWithTokenPartTransform"), - "updated with {}, result: {}", - token_part, token_info->getToken()); - } - - private: - String token_part; - }; - class RestoreChunkInfosTransform : public ISimpleTransform { public: @@ -248,4 +33,176 @@ namespace DB Chunk::ChunkInfoCollection chunk_infos; }; + +namespace DeduplicationToken +{ + class TokenInfo : public ChunkInfoCloneable + { + public: + TokenInfo() = default; + TokenInfo(const TokenInfo & other) = default; + + String getToken(bool enable_assert = true) const; + + bool empty() const { return parts.empty(); } + bool tokenInitialized() const { return stage != INITIAL && stage != SOURCE_BLOCK_NUMBER; } + + void setInitialToken(String part); + void setUserToken(const String & token); + void setSourceBlockNumber(size_t sbn); + void setMaterializeViewID(const String & id); + void setMaterializeViewBlockNumber(size_t mvbn); + void reset(); + + private: + void addTokenPart(String part); + size_t getTotalSize() const; + + enum BuildingStage + { + INITIAL, + SOURCE_BLOCK_NUMBER, + MATERIALIZE_VIEW_ID, + MATERIALIZE_VIEW_BLOCK_NUMBER, + }; + + BuildingStage stage = INITIAL; + std::vector parts; + }; + + + class CheckTokenTransform : public ISimpleTransform + { + public: + CheckTokenTransform(String debug_, bool must_be_present_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , debug(debug_) + , must_be_present(must_be_present_) + { + } + + String getName() const override { return "DeduplicationToken::CheckTokenTransform"; } + + void transform(Chunk & chunk) override; + + private: + String debug; + bool must_be_present = false; + }; + + + class AddTokenInfoTransform : public ISimpleTransform + { + public: + explicit AddTokenInfoTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::AddTokenInfoTransform"; } + + void transform(Chunk & chunk) override + { + chunk.getChunkInfos().add(std::make_shared()); + } + }; + + + class SetInitialTokenTransform : public ISimpleTransform + { + public: + explicit SetInitialTokenTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::SetInitialTokenTransform"; } + + void transform(Chunk & chunk) override; + }; + + class ResetTokenTransform : public ISimpleTransform + { + public: + explicit ResetTokenTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::ResetTokenTransform"; } + + void transform(Chunk & chunk) override; + }; + + + class SetUserTokenTransform : public ISimpleTransform + { + public: + SetUserTokenTransform(String user_token_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , user_token(std::move(user_token_)) + { + } + + String getName() const override { return "DeduplicationToken::SetUserTokenTransform"; } + + void transform(Chunk & chunk) override; + + private: + String user_token; + }; + + + class SetSourceBlockNumberTransform : public ISimpleTransform + { + public: + explicit SetSourceBlockNumberTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::SetSourceBlockNumberTransform"; } + + void transform(Chunk & chunk) override; + + private: + size_t block_number; + }; + + + class SetMaterializeViewIDTransform : public ISimpleTransform + { + public: + SetMaterializeViewIDTransform(String mv_id_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , mv_id(std::move(mv_id_)) + { + } + + String getName() const override { return "DeduplicationToken::SetMaterializeViewIDTransform"; } + + void transform(Chunk & chunk) override; + + private: + String mv_id; + }; + + + class SetMaterializeViewBlockNumberTransform : public ISimpleTransform + { + public: + explicit SetMaterializeViewBlockNumberTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::SetMaterializeViewBlockNumberTransform"; } + + void transform(Chunk & chunk) override; + + private: + size_t block_number; + }; + +} } diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 7464cb79ba6..1a29b8d8a2d 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -79,7 +79,7 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( void SimpleSquashingChunksTransform::transform(Chunk & chunk) { LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - "transform {}", chunk.getNumRows()); + "transform {}, finished {}", chunk.getNumRows(), finished); if (!finished) { diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index ccecfcf3333..0c1893e0f37 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -108,7 +108,7 @@ private: class ExecutingInnerQueryFromViewTransform final : public ExceptionKeepingTransform { public: - ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_); + ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_, bool disable_deduplication_for_children_); String getName() const override { return "ExecutingInnerQueryFromView"; } @@ -119,6 +119,7 @@ protected: private: ViewsDataPtr views_data; ViewRuntimeData & view; + bool disable_deduplication_for_children; struct State { @@ -219,6 +220,11 @@ std::optional generateViewChain( const auto & insert_settings = insert_context->getSettingsRef(); + if (disable_deduplication_for_children) + { + insert_context->setSetting("insert_deduplicate", Field{false}); + } + // Processing of blocks for MVs is done block by block, and there will // be no parallel reading after (plus it is not a costless operation) select_context->setSetting("parallelize_output_from_storages", Field{false}); @@ -330,16 +336,6 @@ std::optional generateViewChain( bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, check_access); - out.addSource(std::make_shared("Before inner chain", !disable_deduplication_for_children, out.getInputHeader())); - - if (!disable_deduplication_for_children) - { - String addition_part = view_id.hasUUID() ? toString(view_id.uuid) : view_id.getFullNameNotQuoted(); - out.addSource(std::make_shared(fmt::format(":mv-{}", addition_part), out.getInputHeader())); - } - - out.addSource(std::make_shared("Before extend token", !disable_deduplication_for_children, out.getInputHeader())); - if (interpreter.shouldAddSquashingFroStorage(inner_table)) { bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); @@ -351,7 +347,7 @@ std::optional generateViewChain( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } - out.addSource(std::make_shared("Before squashing", !disable_deduplication_for_children, out.getInputHeader())); + out.addSource(std::make_shared("Before squashing", !disable_deduplication_for_children, out.getInputHeader())); auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); @@ -394,23 +390,15 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { - out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); - - // if (!disable_deduplication_for_children) - // { - // // out.addSource(std::make_shared(out.getInputHeader())); - // // out.addSource(std::make_shared(out.getInputHeader())); - - // out.addSource(std::make_shared(out.getInputHeader())); - // } + out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); auto executing_inner_query = std::make_shared( - storage_header, views_data->views.back(), views_data); + storage_header, views_data->views.back(), views_data, disable_deduplication_for_children); executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms); out.addSource(std::move(executing_inner_query)); - out.addSource(std::make_shared("Right before Inner query", !disable_deduplication_for_children, out.getInputHeader())); + out.addSource(std::make_shared("Right before Inner query", !disable_deduplication_for_children, out.getInputHeader())); } return out; @@ -451,8 +439,6 @@ Chain buildPushingToViewsChain( */ result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); - /// If the "root" table deduplicates blocks, there are no need to make deduplication for children - /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks bool disable_deduplication_for_children = false; if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) disable_deduplication_for_children = !no_destination && storage->supportsDeduplication(); @@ -563,6 +549,10 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); } + else + { + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + } if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); @@ -578,7 +568,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection chunk_infos) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection chunk_infos, bool disable_deduplication_for_children) { const auto & context = views_data.context; @@ -625,9 +615,18 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), std::make_shared(std::move(converting)))); - //pipeline.addTransform(std::make_shared(pipeline.getHeader())); pipeline.addTransform(std::make_shared(std::move(chunk_infos), pipeline.getHeader())); - pipeline.addTransform(std::make_shared(pipeline.getHeader())); + + if (!disable_deduplication_for_children) + { + String materialize_view_id = view.table_id.hasUUID() ? toString(view.table_id.uuid) : view.table_id.getFullNameNotQuoted(); + pipeline.addTransform(std::make_shared(std::move(materialize_view_id), pipeline.getHeader())); + pipeline.addTransform(std::make_shared(pipeline.getHeader())); + } + else + { + pipeline.addTransform(std::make_shared(pipeline.getHeader())); + } return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } @@ -720,17 +719,19 @@ IProcessor::Status CopyingDataToViewsTransform::prepare() ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( const Block & header, ViewRuntimeData & view_, - std::shared_ptr views_data_) + std::shared_ptr views_data_, + bool disable_deduplication_for_children_) : ExceptionKeepingTransform(header, view_.sample_block) , views_data(std::move(views_data_)) , view(view_) + , disable_deduplication_for_children(disable_deduplication_for_children_) { } void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); - state.emplace(process(block, view, *views_data, chunk.getChunkInfos())); + state.emplace(process(block, view, *views_data, chunk.getChunkInfos(), disable_deduplication_for_children)); } diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 6ca4ec6e079..b86845d48e0 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -740,7 +740,13 @@ bool StorageFileLog::streamToViews() auto new_context = Context::createCopy(getContext()); - InterpreterInsertQuery interpreter(insert, new_context, false, true, true, false); + InterpreterInsertQuery interpreter( + insert, + new_context, + false, + true, + true, + false); auto block_io = interpreter.execute(); /// Each stream responsible for closing it's files and store meta diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 2d29f87c556..4b0fa94e183 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -90,16 +90,20 @@ void MergeTreeSink::consume(Chunk & chunk) bool support_parallel_write = false; String block_dedup_token; - std::shared_ptr dedub_token_info_for_children = nullptr; + auto token_info = chunk.getChunkInfos().get(); if (storage.getDeduplicationLog()) { - auto token_info = chunk.getChunkInfos().get(); - if (!token_info && !context->getSettingsRef().insert_deduplication_token.value.empty()) + if (!token_info) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + "DedupTokenBuilder is expected for consumed chunk in MergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); - if (token_info) + if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "DedupTokenBuilder has to be initialized with user token for table: {}", + storage.getStorageID().getNameForLogs()); + + if (token_info->tokenInitialized()) { block_dedup_token = token_info->getToken(); @@ -109,9 +113,6 @@ void MergeTreeSink::consume(Chunk & chunk) } else { - dedub_token_info_for_children = std::make_shared(); - chunk.getChunkInfos().add(dedub_token_info_for_children); - LOG_DEBUG(storage.log, "dedup token from hash is calculated"); } @@ -141,10 +142,10 @@ void MergeTreeSink::consume(Chunk & chunk) if (!temp_part.part) continue; - if (dedub_token_info_for_children) + if (!token_info->tokenInitialized()) { chassert(temp_part.part); - dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + token_info->setInitialToken(temp_part.part->getPartBlockIDHash()); } if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index e855bb7d969..b03f3f88611 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -294,17 +294,21 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } String block_dedup_token; - std::shared_ptr dedub_token_info_for_children = nullptr; + auto token_info = chunk.getChunkInfos().get(); if constexpr (!async_insert) { - auto token_info = chunk.getChunkInfos().get(); - if (!token_info && !context->getSettingsRef().insert_deduplication_token.value.empty()) + if (!token_info) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + "DedupTokenBuilder is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); + + if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "DedupTokenBuilder has to be initialized with user token for table: {}", storage.getStorageID().getNameForLogs()); - if (token_info) + if (token_info->tokenInitialized()) { /// multiple blocks can be inserted within the same insert query /// an ordinal number is added to dedup token to generate a distinctive block id for each block @@ -316,8 +320,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } else { - dedub_token_info_for_children = std::make_shared(); - chunk.getChunkInfos().add(dedub_token_info_for_children); LOG_DEBUG(storage.log, "dedup token from hash is calculated"); } @@ -386,10 +388,10 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - if (dedub_token_info_for_children) + if (!token_info->tokenInitialized()) { chassert(temp_part.part); - dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + token_info->setInitialToken(temp_part.part->getPartBlockIDHash()); } } @@ -444,8 +446,8 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) /// value for `last_block_is_duplicate`, which is possible only after the part is committed. /// Othervide we can delay commit. /// TODO: we can also delay commit if there is no MVs. - if (!settings.deduplicate_blocks_in_dependent_materialized_views) - finishDelayedChunk(zookeeper); + // if (!settings.deduplicate_blocks_in_dependent_materialized_views) + // finishDelayedChunk(zookeeper); ++num_blocks_processed; } @@ -456,8 +458,6 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF if (!delayed_chunk) return; - last_block_is_duplicate = false; - for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -470,8 +470,6 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF { bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num).second; - last_block_is_duplicate = last_block_is_duplicate || deduplicated; - /// Set a special error code if the block is duplicate int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index e460804d7f1..7d025361717 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -59,16 +59,6 @@ public: /// For ATTACHing existing data on filesystem. bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part); - /// For proper deduplication in MaterializedViews - bool lastBlockIsDuplicate() const override - { - /// If MV is responsible for deduplication, block is not considered duplicating. - if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) - return false; - - return last_block_is_duplicate; - } - struct DelayedChunk; private: std::vector detectConflictsInAsyncBlockIDs(const std::vector & ids); @@ -126,7 +116,6 @@ private: bool allow_attach_while_readonly = false; bool quorum_parallel = false; const bool deduplicate = true; - bool last_block_is_duplicate = false; UInt64 num_blocks_processed = 0; LoggerPtr log; diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference index 335b55f05c8..ae82b9c0463 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference @@ -10,7 +10,8 @@ 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 42 +2022-09-01 12:00:00 84 +2023-09-01 12:00:00 42 -- Original issue with deduplicate_blocks_in_dependent_materialized_views = 1 AND max_insert_delayed_streams_for_parallel_write > 1 -- Landing 2022-09-01 12:23:34 42 diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql index f206f0d7775..06fe156500d 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql @@ -56,6 +56,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view - 2nd insert gets first block 20220901 deduplicated and second one inserted in landing table - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded + Now it is fixed. */ SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 1000; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference index 9b4738ce805..641735d1bb6 100644 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference @@ -121,47 +121,93 @@ OK Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b -count 1 +count 10 table_when_b_even -count 1 -EXPECTED_TO_FAIL +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +FIXED Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b -count 1 +count 10 table_when_b_even -count 1 -EXPECTED_TO_FAIL +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b -count 1 +count 10 table_when_b_even count 5 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b -count 1 +count 10 table_when_b_even count 10 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +FIXED Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even -count 1 +count 5 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +FIXED Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even -count 1 +count 10 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +FIXED Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b @@ -555,47 +601,93 @@ OK Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b -count 1 +count 10 table_when_b_even -count 1 -EXPECTED_TO_FAIL +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +FIXED Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b -count 1 +count 10 table_when_b_even -count 1 -EXPECTED_TO_FAIL +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b -count 1 +count 10 table_when_b_even count 5 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b -count 1 +count 10 table_when_b_even count 10 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +FIXED Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even -count 1 +count 5 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +FIXED Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even -count 1 +count 10 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +FIXED Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference index 4411bdecea8..06f30793670 100644 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference @@ -121,47 +121,93 @@ OK Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b -count 1 +count 5 table_when_b_even_and_joined -count 10 -EXPECTED_TO_FAIL +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b -count 1 +count 5 table_when_b_even_and_joined -count 9 -EXPECTED_TO_FAIL +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b -count 1 +count 5 table_when_b_even_and_joined count 47 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +FIXED Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b -count 1 +count 5 table_when_b_even_and_joined count 45 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +FIXED Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined -count 10 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 45 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b @@ -197,9 +243,16 @@ Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -250,9 +303,16 @@ Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -303,9 +363,16 @@ Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -356,9 +423,16 @@ Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -527,47 +601,93 @@ OK Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b -count 1 +count 5 table_when_b_even_and_joined -count 10 -EXPECTED_TO_FAIL +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b -count 1 +count 5 table_when_b_even_and_joined -count 9 -EXPECTED_TO_FAIL +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b -count 1 +count 5 table_when_b_even_and_joined count 47 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +FIXED Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b -count 1 +count 5 table_when_b_even_and_joined count 45 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +FIXED Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined -count 10 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 45 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b @@ -603,9 +723,16 @@ Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -656,9 +783,16 @@ Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -709,9 +843,16 @@ Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -762,9 +903,16 @@ Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1055,9 +1203,16 @@ Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1108,9 +1263,16 @@ Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1161,9 +1323,16 @@ Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1214,9 +1383,16 @@ Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1507,9 +1683,16 @@ Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_ table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1560,9 +1743,16 @@ Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_ table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1613,9 +1803,16 @@ Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_ table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1666,9 +1863,16 @@ Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_ table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference index a56f7deb744..4d517948a25 100644 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference @@ -88,36 +88,70 @@ table_dst count 32 OK Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 1 -table_dst count 2 -EXPECTED_TO_FAIL +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -EXPECTED_TO_FAIL +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +FIXED Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 1 +table_src count 8 table_dst count 6 -EXPECTED_TO_FAIL +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +FIXED Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 +table_src count 8 table_dst count 16 -EXPECTED_TO_FAIL +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +FIXED Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 2 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 16 +table_dst count 6 +0 +0 +FIXED Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 -table_dst count 2 +table_dst count 16 0 -EXPECTED_TO_FAIL +0 +table_src count 16 +table_dst count 16 +0 +0 +FIXED Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -143,19 +177,25 @@ OK Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -181,19 +221,25 @@ OK Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -219,19 +265,25 @@ OK Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -257,19 +309,25 @@ OK Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -382,36 +440,70 @@ table_dst count 32 OK Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 1 -table_dst count 2 -EXPECTED_TO_FAIL +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -EXPECTED_TO_FAIL +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +FIXED Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 1 +table_src count 8 table_dst count 6 -EXPECTED_TO_FAIL +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +FIXED Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 +table_src count 8 table_dst count 16 -EXPECTED_TO_FAIL +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +FIXED Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 2 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 16 +table_dst count 6 +0 +0 +FIXED Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 -table_dst count 2 +table_dst count 16 0 -EXPECTED_TO_FAIL +0 +table_src count 16 +table_dst count 16 +0 +0 +FIXED Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -437,19 +529,25 @@ OK Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -475,19 +573,25 @@ OK Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -513,19 +617,25 @@ OK Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -551,19 +661,25 @@ OK Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -765,19 +881,25 @@ OK Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -803,19 +925,25 @@ OK Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -841,19 +969,25 @@ OK Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -879,19 +1013,25 @@ OK Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -1093,19 +1233,25 @@ OK Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -1131,19 +1277,25 @@ OK Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -1169,19 +1321,25 @@ OK Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -1207,19 +1365,25 @@ OK Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 From 55ff6446b5035588eb6985e6fa2291ca444f0a00 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 7 May 2024 12:20:31 +0200 Subject: [PATCH 071/439] adjust rebase --- src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp | 7 +++++-- src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 7094578a9cc..c55bcd08573 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -191,7 +191,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali return {std::move(serialized_key_column), std::move(serialized_value_column)}; } -void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) +void EmbeddedRocksDBBulkSink::consume(Chunk & chunk_) { std::vector to_written = squash(std::move(chunk_)); @@ -217,7 +217,10 @@ void EmbeddedRocksDBBulkSink::onFinish() { /// If there is any data left, write it. if (!chunks.empty()) - consume({}); + { + Chunk empty; + consume(empty); + } } String EmbeddedRocksDBBulkSink::getTemporarySSTFilePath() diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h index 19ce1e3b83e..be425208357 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h @@ -34,7 +34,7 @@ public: ~EmbeddedRocksDBBulkSink() override; - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; From 7a8f6b120699a9c4baf7a465ed21fa0aa66ddda5 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 17 May 2024 16:26:15 +0200 Subject: [PATCH 072/439] fix window view and other tests --- .../Sources/SourceFromSingleChunk.cpp | 4 +- .../Transforms/NumberBlocksTransform.cpp | 22 ++++++++-- .../Transforms/NumberBlocksTransform.h | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 8 ++-- src/Storages/LiveView/StorageLiveView.cpp | 4 +- src/Storages/LiveView/StorageLiveView.h | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 4 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 16 ++++++-- src/Storages/WindowView/StorageWindowView.cpp | 41 +++++++++++++++---- src/Storages/WindowView/StorageWindowView.h | 2 +- .../03035_max_insert_threads_support.sh | 2 +- 11 files changed, 78 insertions(+), 29 deletions(-) diff --git a/src/Processors/Sources/SourceFromSingleChunk.cpp b/src/Processors/Sources/SourceFromSingleChunk.cpp index fb888c104c4..9abe0504d10 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.cpp +++ b/src/Processors/Sources/SourceFromSingleChunk.cpp @@ -5,7 +5,9 @@ namespace DB { -SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) {} +SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) +{ +} SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows()) { diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/NumberBlocksTransform.cpp index 19ebf94a27a..387d1ceb8e0 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.cpp +++ b/src/Processors/Transforms/NumberBlocksTransform.cpp @@ -12,6 +12,12 @@ namespace DB { + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace DeduplicationToken { @@ -101,9 +107,17 @@ void CheckTokenTransform::transform(Chunk & chunk) void SetInitialTokenTransform::transform(Chunk & chunk) { - auto token_builder = chunk.getChunkInfos().get(); - chassert(token_builder); - if (token_builder->tokenInitialized()) + auto token_info = chunk.getChunkInfos().get(); + + LOG_DEBUG(getLogger("SetInitialTokenTransform"), "has token_info {}", bool(token_info)); + + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetInitialTokenTransform"); + + chassert(token_info); + if (!token_info || token_info->tokenInitialized()) return; SipHash hash; @@ -111,7 +125,7 @@ void SetInitialTokenTransform::transform(Chunk & chunk) colunm->updateHashFast(hash); const auto hash_value = hash.get128(); - token_builder->setInitialToken(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); + token_info->setInitialToken(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); } void SetUserTokenTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index 46b62029c21..6978fe5e6b6 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -166,7 +166,7 @@ namespace DeduplicationToken void transform(Chunk & chunk) override; private: - size_t block_number; + size_t block_number = 0; }; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 0c1893e0f37..2e6baea7c26 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -24,8 +24,8 @@ #include #include #include -#include "Processors/Chunk.h" -#include "Processors/Transforms/NumberBlocksTransform.h" +#include +#include #include #include @@ -766,7 +766,7 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi void PushingToLiveViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(getHeader().cloneWithColumns(chunk.getColumns()), context); + live_view.writeBlock(getHeader(), chunk, context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -790,7 +790,7 @@ void PushingToWindowViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader().cloneWithColumns(chunk.getColumns()), context); + window_view, getHeader(), chunk, context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index c3aacfd67d3..f6008347425 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -330,7 +330,7 @@ Pipe StorageLiveView::watch( return reader; } -void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) +void StorageLiveView::writeBlock(const Block & header, Chunk & chunk, ContextPtr local_context) { auto output = std::make_shared(*this); @@ -363,7 +363,7 @@ void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) if (!is_block_processed) { Pipes pipes; - pipes.emplace_back(std::make_shared(block)); + pipes.emplace_back(std::make_shared(header, chunk.clone())); auto creator = [&](const StorageID & blocks_id_global) { diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 91daac32c7b..fce5bad6240 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -118,7 +118,7 @@ public: return 0; } - void writeBlock(const Block & block, ContextPtr context); + void writeBlock(const Block & header, Chunk & chunk, ContextPtr context); void refresh(); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 4b0fa94e183..c252d95a5e9 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -95,12 +95,12 @@ void MergeTreeSink::consume(Chunk & chunk) { if (!token_info) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenBuilder is expected for consumed chunk in MergeTreeSink for table: {}", + "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenBuilder has to be initialized with user token for table: {}", + "TokenInfo has to be initialized with user token for table: {}", storage.getStorageID().getNameForLogs()); if (token_info->tokenInitialized()) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index b03f3f88611..41fdb86f3bd 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -299,12 +299,12 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) { if (!token_info) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenBuilder is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", + "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenBuilder has to be initialized with user token for table: {}", + "TokenInfo has to be initialized with user token for table: {}", storage.getStorageID().getNameForLogs()); @@ -1174,8 +1174,16 @@ void ReplicatedMergeTreeSinkImpl::onStart() template void ReplicatedMergeTreeSinkImpl::onFinish() { - auto zookeeper = storage.getZooKeeper(); - finishDelayedChunk(std::make_shared(zookeeper)); + const auto & settings = context->getSettingsRef(); + + ZooKeeperWithFaultInjectionPtr zookeeper = ZooKeeperWithFaultInjection::createInstance( + settings.insert_keeper_fault_injection_probability, + settings.insert_keeper_fault_injection_seed, + storage.getZooKeeper(), + "ReplicatedMergeTreeSink::onFinish", + log); + + finishDelayedChunk(zookeeper); } template diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index e0f3b437af7..b81ca34c427 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -1415,22 +1416,25 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) } void StorageWindowView::writeIntoWindowView( - StorageWindowView & window_view, const Block & block, ContextPtr local_context) + StorageWindowView & window_view, const Block & header, Chunk & chunk, ContextPtr local_context) { window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) std::this_thread::sleep_for(std::chrono::milliseconds(100)); - if (!window_view.is_proctime && window_view.max_watermark == 0 && block.rows() > 0) + if (!window_view.is_proctime && window_view.max_watermark == 0 && chunk.getNumRows() > 0) { std::lock_guard lock(window_view.fire_signal_mutex); - const auto & window_column = block.getByName(window_view.timestamp_column_name); + const auto & window_column = header.getByName(window_view.timestamp_column_name); const ColumnUInt32::Container & window_end_data = static_cast(*window_column.column).getData(); UInt32 first_record_timestamp = window_end_data[0]; window_view.max_watermark = window_view.getWindowUpperBound(first_record_timestamp); } - Pipe pipe(std::make_shared(block.cloneEmpty(), Chunk(block.getColumns(), block.rows()))); + auto chunk_infos = chunk.getChunkInfos(); + chunk.setChunkInfos({}); + + Pipe pipe(std::make_shared(header.cloneEmpty(), std::move(chunk))); UInt32 lateness_bound = 0; UInt32 t_max_watermark = 0; @@ -1475,10 +1479,10 @@ void StorageWindowView::writeIntoWindowView( auto syntax_result = TreeRewriter(local_context).analyze(query, columns); auto filter_expression = ExpressionAnalyzer(filter_function, syntax_result, local_context).getActionsDAG(false); - pipe.addSimpleTransform([&](const Block & header) + pipe.addSimpleTransform([&](const Block & header_) { return std::make_shared( - header, std::make_shared(filter_expression), + header_, std::make_shared(filter_expression), filter_function->getColumnName(), true); }); } @@ -1533,6 +1537,17 @@ void StorageWindowView::writeIntoWindowView( QueryProcessingStage::WithMergeableState); builder = select_block.buildQueryPipeline(); + + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(std::move(chunk_infos), stream_header); + }); + + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared("StorageWindowView: Afrer tmp table before squasing", true, stream_header); + }); + builder.addSimpleTransform([&](const Block & current_header) { return std::make_shared( @@ -1546,7 +1561,7 @@ void StorageWindowView::writeIntoWindowView( UInt32 block_max_timestamp = 0; if (window_view.is_watermark_bounded || window_view.allowed_lateness) { - const auto & timestamp_column = *block.getByName(window_view.timestamp_column_name).column; + const auto & timestamp_column = *header.getByName(window_view.timestamp_column_name).column; const auto & timestamp_data = typeid_cast(timestamp_column).getData(); for (const auto & timestamp : timestamp_data) block_max_timestamp = std::max(timestamp, block_max_timestamp); @@ -1572,6 +1587,11 @@ void StorageWindowView::writeIntoWindowView( lateness_upper_bound); }); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared("StorageWindowView: Afrer WatermarkTransform", true, stream_header); + }); + auto inner_table = window_view.getInnerTable(); auto lock = inner_table->lockForShare( local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); @@ -1588,9 +1608,14 @@ void StorageWindowView::writeIntoWindowView( auto convert_actions = std::make_shared( convert_actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); - builder.addSimpleTransform([&](const Block & header) { return std::make_shared(header, convert_actions); }); + builder.addSimpleTransform([&](const Block & header_) { return std::make_shared(header_, convert_actions); }); } + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared("StorageWindowView: Before out", true, stream_header); + }); + builder.addChain(Chain(std::move(output))); builder.setSinks([&](const Block & cur_header, Pipe::StreamType) { diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index f79867df424..56a21279b86 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -166,7 +166,7 @@ public: BlockIO populate(); - static void writeIntoWindowView(StorageWindowView & window_view, const Block & block, ContextPtr context); + static void writeIntoWindowView(StorageWindowView & window_view, const Block & header, Chunk & chunk, ContextPtr context); ASTPtr getMergeableQuery() const { return mergeable_query->clone(); } diff --git a/tests/queries/0_stateless/03035_max_insert_threads_support.sh b/tests/queries/0_stateless/03035_max_insert_threads_support.sh index 1e6bfb414d8..cedb651a430 100755 --- a/tests/queries/0_stateless/03035_max_insert_threads_support.sh +++ b/tests/queries/0_stateless/03035_max_insert_threads_support.sh @@ -8,7 +8,7 @@ DATA_FILE="data_$CLICKHOUSE_TEST_UNIQUE_NAME.csv" $CLICKHOUSE_CLIENT --max_insert_threads=4 --query=" EXPLAIN PIPELINE INSERT INTO FUNCTION file('$DATA_FILE') SELECT * FROM numbers_mt(1000000) ORDER BY number DESC -" | grep -o MaterializingTransform | wc -l +" | grep -o StorageFileSink | wc -l DATA_FILE_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path from file('$DATA_FILE', 'One')") rm $DATA_FILE_PATH From 4fa59ca49dc7536dbe5b10cbe1f56cd411415aa2 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 17 May 2024 17:42:18 +0200 Subject: [PATCH 073/439] adjust style --- src/Processors/Transforms/NumberBlocksTransform.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index 6978fe5e6b6..610c219dfa2 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -6,11 +6,6 @@ #include -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - namespace DB { class RestoreChunkInfosTransform : public ISimpleTransform From ae124bf0b36958be0f1ef492272cd85664e50eb7 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 21 May 2024 17:07:31 +0200 Subject: [PATCH 074/439] fix tests for liveview windowview --- .../Transforms/NumberBlocksTransform.cpp | 64 +++++++++++-------- .../Transforms/NumberBlocksTransform.h | 37 ++++++----- .../Transforms/buildPushingToViewsChain.cpp | 8 +-- src/Storages/LiveView/StorageLiveView.cpp | 20 +++++- src/Storages/LiveView/StorageLiveView.h | 2 +- src/Storages/WindowView/StorageWindowView.cpp | 38 ++++++++--- src/Storages/WindowView/StorageWindowView.h | 2 +- ...view_and_deduplication_zookeeper.reference | 4 +- ...lized_view_and_deduplication_zookeeper.sql | 2 +- 9 files changed, 116 insertions(+), 61 deletions(-) diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/NumberBlocksTransform.cpp index 387d1ceb8e0..11054f652ff 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.cpp +++ b/src/Processors/Transforms/NumberBlocksTransform.cpp @@ -23,7 +23,7 @@ namespace DeduplicationToken String DB::DeduplicationToken::TokenInfo::getToken(bool enable_assert) const { - chassert(stage == MATERIALIZE_VIEW_ID || !enable_assert); + chassert(stage == VIEW_ID || !enable_assert); String result; result.reserve(getTotalSize()); @@ -38,7 +38,7 @@ void DB::DeduplicationToken::TokenInfo::setInitialToken(String part) { chassert(stage == INITIAL); addTokenPart(std::move(part)); - stage = MATERIALIZE_VIEW_ID; + stage = VIEW_ID; } void TokenInfo::setUserToken(const String & token) @@ -52,21 +52,21 @@ void TokenInfo::setSourceBlockNumber(size_t sbn) { chassert(stage == SOURCE_BLOCK_NUMBER); addTokenPart(fmt::format(":source-number-{}", sbn)); - stage = MATERIALIZE_VIEW_ID; + stage = VIEW_ID; } -void TokenInfo::setMaterializeViewID(const String & id) +void TokenInfo::setViewID(const String & id) { - chassert(stage == MATERIALIZE_VIEW_ID); - addTokenPart(fmt::format(":mv-{}", id)); - stage = MATERIALIZE_VIEW_BLOCK_NUMBER; + chassert(stage == VIEW_ID); + addTokenPart(fmt::format(":view-id-{}", id)); + stage = VIEW_BLOCK_NUMBER; } -void TokenInfo::setMaterializeViewBlockNumber(size_t mvbn) +void TokenInfo::setViewBlockNumber(size_t mvbn) { - chassert(stage == MATERIALIZE_VIEW_BLOCK_NUMBER); - addTokenPart(fmt::format(":mv-bn-{}", mvbn)); - stage = MATERIALIZE_VIEW_ID; + chassert(stage == VIEW_BLOCK_NUMBER); + addTokenPart(fmt::format(":view-block-{}", mvbn)); + stage = VIEW_ID; } void TokenInfo::reset() @@ -116,8 +116,7 @@ void SetInitialTokenTransform::transform(Chunk & chunk) ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetInitialTokenTransform"); - chassert(token_info); - if (!token_info || token_info->tokenInitialized()) + if (token_info->tokenInitialized()) return; SipHash hash; @@ -131,39 +130,52 @@ void SetInitialTokenTransform::transform(Chunk & chunk) void SetUserTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - chassert(token_info); - chassert(!token_info->tokenInitialized()); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetUserTokenTransform"); token_info->setUserToken(user_token); } void SetSourceBlockNumberTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - chassert(token_info); - chassert(!token_info->tokenInitialized()); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform"); token_info->setSourceBlockNumber(block_number++); } -void SetMaterializeViewIDTransform::transform(Chunk & chunk) +void SetViewIDTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - chassert(token_info); - chassert(token_info->tokenInitialized()); - token_info->setMaterializeViewID(mv_id); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetViewIDTransform"); + token_info->setViewID(view_id); } -void SetMaterializeViewBlockNumberTransform::transform(Chunk & chunk) +void SetViewBlockNumberTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - chassert(token_info); - chassert(token_info->tokenInitialized()); - token_info->setMaterializeViewBlockNumber(block_number++); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform"); + token_info->setViewBlockNumber(block_number++); } void ResetTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - chassert(token_info); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in ResetTokenTransform"); + + LOG_DEBUG(getLogger("ResetTokenTransform"), "token_info was {}", token_info->getToken(false)); token_info->reset(); } diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index 610c219dfa2..b4f61eb887c 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -13,14 +13,21 @@ namespace DB public: RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_) : ISimpleTransform(header_, header_, true) - , chunk_infos(chunk_infos_) + , chunk_infos(std::move(chunk_infos_)) { + LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "create RestoreChunkInfosTransform to append {}:{}", + chunk_infos.size(), chunk_infos.debug()); } String getName() const override { return "RestoreChunkInfosTransform"; } void transform(Chunk & chunk) override { + LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "chunk infos before: {}:{}, append: {}:{}, chunk has rows {}", + chunk.getChunkInfos().size(), chunk.getChunkInfos().debug(), + chunk_infos.size(), chunk_infos.debug(), + chunk.getNumRows()); + chunk.getChunkInfos().append(chunk_infos.clone()); } @@ -45,8 +52,8 @@ namespace DeduplicationToken void setInitialToken(String part); void setUserToken(const String & token); void setSourceBlockNumber(size_t sbn); - void setMaterializeViewID(const String & id); - void setMaterializeViewBlockNumber(size_t mvbn); + void setViewID(const String & id); + void setViewBlockNumber(size_t mvbn); void reset(); private: @@ -57,8 +64,8 @@ namespace DeduplicationToken { INITIAL, SOURCE_BLOCK_NUMBER, - MATERIALIZE_VIEW_ID, - MATERIALIZE_VIEW_BLOCK_NUMBER, + VIEW_ID, + VIEW_BLOCK_NUMBER, }; BuildingStage stage = INITIAL; @@ -71,7 +78,7 @@ namespace DeduplicationToken public: CheckTokenTransform(String debug_, bool must_be_present_, const Block & header_) : ISimpleTransform(header_, header_, true) - , debug(debug_) + , debug(std::move(debug_)) , must_be_present(must_be_present_) { } @@ -165,38 +172,38 @@ namespace DeduplicationToken }; - class SetMaterializeViewIDTransform : public ISimpleTransform + class SetViewIDTransform : public ISimpleTransform { public: - SetMaterializeViewIDTransform(String mv_id_, const Block & header_) + SetViewIDTransform(String view_id_, const Block & header_) : ISimpleTransform(header_, header_, true) - , mv_id(std::move(mv_id_)) + , view_id(std::move(view_id_)) { } - String getName() const override { return "DeduplicationToken::SetMaterializeViewIDTransform"; } + String getName() const override { return "DeduplicationToken::SetViewIDTransform"; } void transform(Chunk & chunk) override; private: - String mv_id; + String view_id; }; - class SetMaterializeViewBlockNumberTransform : public ISimpleTransform + class SetViewBlockNumberTransform : public ISimpleTransform { public: - explicit SetMaterializeViewBlockNumberTransform(const Block & header_) + explicit SetViewBlockNumberTransform(const Block & header_) : ISimpleTransform(header_, header_, true) { } - String getName() const override { return "DeduplicationToken::SetMaterializeViewBlockNumberTransform"; } + String getName() const override { return "DeduplicationToken::SetViewBlockNumberTransform"; } void transform(Chunk & chunk) override; private: - size_t block_number; + size_t block_number = 0; }; } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 2e6baea7c26..47ac1f3baed 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -620,8 +620,8 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat if (!disable_deduplication_for_children) { String materialize_view_id = view.table_id.hasUUID() ? toString(view.table_id.uuid) : view.table_id.getFullNameNotQuoted(); - pipeline.addTransform(std::make_shared(std::move(materialize_view_id), pipeline.getHeader())); - pipeline.addTransform(std::make_shared(pipeline.getHeader())); + pipeline.addTransform(std::make_shared(std::move(materialize_view_id), pipeline.getHeader())); + pipeline.addTransform(std::make_shared(pipeline.getHeader())); } else { @@ -766,7 +766,7 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi void PushingToLiveViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(getHeader(), chunk, context); + live_view.writeBlock(live_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -790,7 +790,7 @@ void PushingToWindowViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader(), chunk, context); + window_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index f6008347425..b9d29a90f56 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -27,6 +27,7 @@ limitations under the License. */ #include #include #include +#include "Processors/Transforms/NumberBlocksTransform.h" #include #include @@ -330,7 +331,7 @@ Pipe StorageLiveView::watch( return reader; } -void StorageLiveView::writeBlock(const Block & header, Chunk & chunk, ContextPtr local_context) +void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) { auto output = std::make_shared(*this); @@ -363,7 +364,7 @@ void StorageLiveView::writeBlock(const Block & header, Chunk & chunk, ContextPtr if (!is_block_processed) { Pipes pipes; - pipes.emplace_back(std::make_shared(header, chunk.clone())); + pipes.emplace_back(std::make_shared(block)); auto creator = [&](const StorageID & blocks_id_global) { @@ -407,6 +408,21 @@ void StorageLiveView::writeBlock(const Block & header, Chunk & chunk, ContextPtr builder = interpreter.buildQueryPipeline(); } + builder.addSimpleTransform([&](const Block & cur_header) + { + return std::make_shared(chunk_infos.clone(), cur_header); + }); + + String live_view_id = live_view.getStorageID().hasUUID() ? toString(live_view.getStorageID().uuid) : live_view.getStorageID().getFullNameNotQuoted(); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(live_view_id, stream_header); + }); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(stream_header); + }); + builder.addSimpleTransform([&](const Block & cur_header) { return std::make_shared(cur_header); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index fce5bad6240..12d8e898347 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -118,7 +118,7 @@ public: return 0; } - void writeBlock(const Block & header, Chunk & chunk, ContextPtr context); + void writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); void refresh(); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index b81ca34c427..738de4b07ed 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1416,25 +1416,27 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) } void StorageWindowView::writeIntoWindowView( - StorageWindowView & window_view, const Block & header, Chunk & chunk, ContextPtr local_context) + StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) { + LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: rows {}, infos {} with {}, window column {}", + block.rows(), + chunk_infos.size(), chunk_infos.debug(), + window_view.timestamp_column_name); + window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) std::this_thread::sleep_for(std::chrono::milliseconds(100)); - if (!window_view.is_proctime && window_view.max_watermark == 0 && chunk.getNumRows() > 0) + if (!window_view.is_proctime && window_view.max_watermark == 0 && block.rows() > 0) { std::lock_guard lock(window_view.fire_signal_mutex); - const auto & window_column = header.getByName(window_view.timestamp_column_name); + const auto & window_column = block.getByName(window_view.timestamp_column_name); const ColumnUInt32::Container & window_end_data = static_cast(*window_column.column).getData(); UInt32 first_record_timestamp = window_end_data[0]; window_view.max_watermark = window_view.getWindowUpperBound(first_record_timestamp); } - auto chunk_infos = chunk.getChunkInfos(); - chunk.setChunkInfos({}); - - Pipe pipe(std::make_shared(header.cloneEmpty(), std::move(chunk))); + Pipe pipe(std::make_shared(block)); UInt32 lateness_bound = 0; UInt32 t_max_watermark = 0; @@ -1465,6 +1467,9 @@ void StorageWindowView::writeIntoWindowView( lateness_bound = t_max_fired_watermark; } + LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: lateness_bound {}, window_view.is_proctime {}", + lateness_bound, window_view.is_proctime); + if (lateness_bound > 0) /// Add filter, which leaves rows with timestamp >= lateness_bound { auto filter_function = makeASTFunction( @@ -1540,7 +1545,18 @@ void StorageWindowView::writeIntoWindowView( builder.addSimpleTransform([&](const Block & stream_header) { - return std::make_shared(std::move(chunk_infos), stream_header); + // Can't move chunk_infos here, that function could be called several times + return std::make_shared(chunk_infos.clone(), stream_header); + }); + + String window_view_id = window_view.getStorageID().hasUUID() ? toString(window_view.getStorageID().uuid) : window_view.getStorageID().getFullNameNotQuoted(); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(window_view_id, stream_header); + }); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(stream_header); }); builder.addSimpleTransform([&](const Block & stream_header) @@ -1548,6 +1564,7 @@ void StorageWindowView::writeIntoWindowView( return std::make_shared("StorageWindowView: Afrer tmp table before squasing", true, stream_header); }); + builder.addSimpleTransform([&](const Block & current_header) { return std::make_shared( @@ -1561,7 +1578,7 @@ void StorageWindowView::writeIntoWindowView( UInt32 block_max_timestamp = 0; if (window_view.is_watermark_bounded || window_view.allowed_lateness) { - const auto & timestamp_column = *header.getByName(window_view.timestamp_column_name).column; + const auto & timestamp_column = *block.getByName(window_view.timestamp_column_name).column; const auto & timestamp_data = typeid_cast(timestamp_column).getData(); for (const auto & timestamp : timestamp_data) block_max_timestamp = std::max(timestamp, block_max_timestamp); @@ -1569,6 +1586,9 @@ void StorageWindowView::writeIntoWindowView( if (block_max_timestamp) window_view.updateMaxTimestamp(block_max_timestamp); + + LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: block_max_timestamp {}", + block_max_timestamp); } UInt32 lateness_upper_bound = 0; diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 56a21279b86..14ac65091d3 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -166,7 +166,7 @@ public: BlockIO populate(); - static void writeIntoWindowView(StorageWindowView & window_view, const Block & header, Chunk & chunk, ContextPtr context); + static void writeIntoWindowView(StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); ASTPtr getMergeableQuery() const { return mergeable_query->clone(); } diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference index 741591b0dd4..9c9281dc7e4 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference @@ -1,8 +1,8 @@ 2 3 -2 -2 +3 +3 1 1 diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql index 0a41581025a..51e6a513608 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql @@ -29,7 +29,7 @@ INSERT INTO without_deduplication VALUES (43); SELECT count() FROM with_deduplication; SELECT count() FROM without_deduplication; --- Implicit insert is deduplicated even for MV without_deduplication_mv +-- Implicit insert isn't deduplicated, because deduplicate_blocks_in_dependent_materialized_views = 0 by default SELECT ''; SELECT countMerge(cnt) FROM with_deduplication_mv; SELECT countMerge(cnt) FROM without_deduplication_mv; From 7fe4e675707c1e27edf2a06f3779768a483e6c21 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 21 May 2024 19:02:50 +0200 Subject: [PATCH 075/439] accept test 02457_insert_select_progress_http --- src/Interpreters/InterpreterInsertQuery.cpp | 56 +++++++++---------- src/Interpreters/SquashingTransform.cpp | 37 ++++++------ src/Interpreters/SquashingTransform.h | 7 +-- .../Transforms/SquashingChunksTransform.cpp | 13 +++-- .../MergeTree/ReplicatedMergeTreeSink.cpp | 7 --- 5 files changed, 54 insertions(+), 66 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 0f3df3752cb..339f68258dc 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -545,6 +545,34 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & } } + auto actions_dag = ActionsDAG::makeConvertingActions( + pipeline.getHeader().getColumnsWithTypeAndName(), + query_sample_block.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header, actions); + }); + + /// We need to convert Sparse columns to full, because it's destination storage + /// may not support it or may have different settings for applying Sparse serialization. + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + auto context_ptr = getContext(); + auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + + return counting; + }); + pipeline.resize(1); if (shouldAddSquashingFroStorage(table)) @@ -595,34 +623,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & pipeline.resize(presink_chains.size()); - auto actions_dag = ActionsDAG::makeConvertingActions( - pipeline.getHeader().getColumnsWithTypeAndName(), - query_sample_block.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header, actions); - }); - - /// We need to convert Sparse columns to full, because it's destination storage - /// may not support it or may have different settings for applying Sparse serialization. - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - auto context_ptr = getContext(); - auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - - return counting; - }); - for (auto & chain : presink_chains) pipeline.addResources(chain.detachResources()); pipeline.addChains(std::move(presink_chains)); diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index cf4f2060414..8a902add9a5 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -1,5 +1,6 @@ #include +#include namespace DB { @@ -16,23 +17,6 @@ SquashingTransform::SquashingTransform(size_t min_block_size_rows_, size_t min_b } SquashingTransform::SquashResult SquashingTransform::add(Block && input_block) -{ - return addImpl(std::move(input_block)); -} - -SquashingTransform::SquashResult SquashingTransform::add(const Block & input_block) -{ - return addImpl(input_block); -} - -/* - * To minimize copying, accept two types of argument: const reference for output - * stream, and rvalue reference for input stream, and decide whether to copy - * inside this function. This allows us not to copy Block unless we absolutely - * have to. - */ -template -SquashingTransform::SquashResult SquashingTransform::addImpl(ReferenceType input_block) { /// End of input stream. if (!input_block) @@ -66,7 +50,7 @@ SquashingTransform::SquashResult SquashingTransform::addImpl(ReferenceType input return SquashResult{std::move(to_return), true}; } - append(std::move(input_block)); + append(std::move(input_block)); if (isEnoughSize(accumulated_block)) { Block to_return; @@ -79,8 +63,7 @@ SquashingTransform::SquashResult SquashingTransform::addImpl(ReferenceType input } -template -void SquashingTransform::append(ReferenceType input_block) +void SquashingTransform::append(Block && input_block) { if (!accumulated_block) { @@ -88,6 +71,11 @@ void SquashingTransform::append(ReferenceType input_block) return; } + LOG_DEBUG(getLogger("SquashingTransform"), + "input_block rows {}, size {}, columns {}, accumulated_block rows {}, size {}, columns {}, ", + input_block.rows(), input_block.bytes(), input_block.columns(), + accumulated_block.rows(), accumulated_block.bytes(), accumulated_block.columns()); + assert(blocksHaveEqualStructure(input_block, accumulated_block)); try @@ -96,6 +84,15 @@ void SquashingTransform::append(ReferenceType input_block) { const auto source_column = input_block.getByPosition(i).column; + const auto acc_column = accumulated_block.getByPosition(i).column; + + LOG_DEBUG(getLogger("SquashingTransform"), + "column {} {}, acc rows {}, size {}, allocated {}, input rows {} size {} allocated {}", + i, source_column->getName(), + acc_column->size(), acc_column->byteSize(), acc_column->allocatedBytes(), + source_column->size(), source_column->byteSize(), source_column->allocatedBytes()); + + auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); accumulated_block.getByPosition(i).column = std::move(mutable_column); diff --git a/src/Interpreters/SquashingTransform.h b/src/Interpreters/SquashingTransform.h index f1eba537338..fff55a760db 100644 --- a/src/Interpreters/SquashingTransform.h +++ b/src/Interpreters/SquashingTransform.h @@ -34,7 +34,6 @@ public: * At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true. */ SquashResult add(Block && block); - SquashResult add(const Block & block); private: size_t min_block_size_rows; @@ -42,11 +41,7 @@ private: Block accumulated_block; - template - SquashResult addImpl(ReferenceType block); - - template - void append(ReferenceType block); + void append(Block && block); bool isEnoughSize(const Block & block); bool isEnoughSize(size_t rows, size_t bytes) const; diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 1a29b8d8a2d..ea0d63a2ed7 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -21,7 +21,7 @@ void SquashingChunksTransform::onConsume(Chunk chunk) "onConsume {}", chunk.getNumRows()); if (cur_chunkinfos.empty()) - cur_chunkinfos = chunk.getChunkInfos(); + cur_chunkinfos = chunk.getChunkInfos().clone(); auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); if (result.block) @@ -33,7 +33,7 @@ void SquashingChunksTransform::onConsume(Chunk chunk) if (cur_chunkinfos.empty() && result.input_block_delayed) { - cur_chunkinfos = chunk.getChunkInfos(); + cur_chunkinfos = chunk.getChunkInfos().clone(); } } @@ -79,12 +79,15 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( void SimpleSquashingChunksTransform::transform(Chunk & chunk) { LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - "transform {}, finished {}", chunk.getNumRows(), finished); + "transform rows {}, size {}, columns {}, infos: {}/{}, finished {}", + chunk.getNumRows(), chunk.bytes(), chunk.getNumColumns(), + chunk.getChunkInfos().size(), chunk.getChunkInfos().debug(), + finished); if (!finished) { if (cur_chunkinfos.empty()) - cur_chunkinfos = chunk.getChunkInfos(); + cur_chunkinfos = chunk.getChunkInfos().clone(); auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); if (result.block) @@ -96,7 +99,7 @@ void SimpleSquashingChunksTransform::transform(Chunk & chunk) if (cur_chunkinfos.empty() && result.input_block_delayed) { - cur_chunkinfos = chunk.getChunkInfos(); + cur_chunkinfos = chunk.getChunkInfos().clone(); } } else diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 41fdb86f3bd..11c64c97cb7 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -442,13 +442,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); - /// If deduplicated data should not be inserted into MV, we need to set proper - /// value for `last_block_is_duplicate`, which is possible only after the part is committed. - /// Othervide we can delay commit. - /// TODO: we can also delay commit if there is no MVs. - // if (!settings.deduplicate_blocks_in_dependent_materialized_views) - // finishDelayedChunk(zookeeper); - ++num_blocks_processed; } From 72787fc0fc53c85a6f58808874c7b167554f3ebb Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 23 May 2024 20:19:38 +0200 Subject: [PATCH 076/439] Fix bad conflict resolution --- src/DataTypes/Serializations/ISerialization.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 2714dfbee1f..89e2079490e 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -422,8 +422,6 @@ public: static void addToSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path, ColumnPtr column); static ColumnPtr getFromSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path); - static void addToSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path, DeserializeBinaryBulkStatePtr state); - static DeserializeBinaryBulkStatePtr getFromSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path); static void addToSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path, DeserializeBinaryBulkStatePtr state); static DeserializeBinaryBulkStatePtr getFromSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path); From bc164f842cf3eda28dd3a3d156d5c8c629a51b24 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 24 May 2024 12:31:36 +0000 Subject: [PATCH 077/439] Fix tests --- src/DataTypes/Serializations/SerializationVariant.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index a202cdb71ce..8cdd312a707 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -208,6 +208,10 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian auto * variant_state = checkAndGetState(state); + /// Don't write anything if column is empty. + if (limit == 0) + return; + /// Write number of rows in this granule in compact mode. if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) writeVarUInt(UInt64(limit), *discriminators_stream); From f7238356d1530925032d923dfbe3467a4e9c46be Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 27 May 2024 11:44:45 +0000 Subject: [PATCH 078/439] Commit from private --- programs/disks/CMakeLists.txt | 13 +- programs/disks/CommandChangeDirectory.cpp | 43 +++ programs/disks/CommandCopy.cpp | 61 +-- programs/disks/CommandLink.cpp | 45 +-- programs/disks/CommandList.cpp | 98 +++-- programs/disks/CommandListDisks.cpp | 63 +-- programs/disks/CommandMkDir.cpp | 47 +-- programs/disks/CommandMove.cpp | 47 +-- programs/disks/CommandRead.cpp | 58 +-- programs/disks/CommandRemove.cpp | 43 +-- programs/disks/CommandSwitchDisk.cpp | 47 +++ programs/disks/CommandWrite.cpp | 71 ++-- programs/disks/DisksApp.cpp | 448 +++++++++++++++------- programs/disks/DisksApp.h | 91 +++-- programs/disks/DisksClient.cpp | 42 ++ programs/disks/DisksClient.h | 316 +++++++++++++++ programs/disks/ICommand.cpp | 68 ++-- programs/disks/ICommand.h | 115 ++++-- programs/disks/ICommand_fwd.h | 10 + src/Disks/DiskSelector.cpp | 20 +- 20 files changed, 1166 insertions(+), 580 deletions(-) create mode 100644 programs/disks/CommandChangeDirectory.cpp create mode 100644 programs/disks/CommandSwitchDisk.cpp create mode 100644 programs/disks/DisksClient.cpp create mode 100644 programs/disks/DisksClient.h create mode 100644 programs/disks/ICommand_fwd.h diff --git a/programs/disks/CMakeLists.txt b/programs/disks/CMakeLists.txt index f0949fcfceb..c5b30d61706 100644 --- a/programs/disks/CMakeLists.txt +++ b/programs/disks/CMakeLists.txt @@ -1,18 +1,21 @@ set (CLICKHOUSE_DISKS_SOURCES - DisksApp.cpp ICommand.cpp + DisksClient.cpp + DisksApp.cpp CommandCopy.cpp - CommandLink.cpp - CommandList.cpp CommandListDisks.cpp + CommandList.cpp + CommandLink.cpp + CommandChangeDirectory.cpp CommandMkDir.cpp CommandMove.cpp CommandRead.cpp CommandRemove.cpp - CommandWrite.cpp) + CommandWrite.cpp + CommandSwitchDisk.cpp) if (CLICKHOUSE_CLOUD) - set (CLICKHOUSE_DISKS_SOURCES ${CLICKHOUSE_DISKS_SOURCES} CommandPackedIO.cpp) + set (CLICKHOUSE_DISKS_SOURCES ${CLICKHOUSE_DISKS_SOURCES} CommandPackedIO.cpp) endif () set (CLICKHOUSE_DISKS_LINK diff --git a/programs/disks/CommandChangeDirectory.cpp b/programs/disks/CommandChangeDirectory.cpp new file mode 100644 index 00000000000..9932d918099 --- /dev/null +++ b/programs/disks/CommandChangeDirectory.cpp @@ -0,0 +1,43 @@ +#include +#include "Common/Exception.h" +#include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} + +class CommandChangeDirectory final : public ICommand +{ +public: + explicit CommandChangeDirectory() : ICommand() + { + command_name = "cd"; + description = "Change directory"; + options_description.add_options()("path", po::value(), "the path of listing (mandatory, positional)")( + "disk", po::value(), "A disk where the path is changed"); + positional_options_description.add("path", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override + { + DiskWithPath & disk = getDiskWithPath(client, options, "disk"); + // std::cerr << "Disk name: " << disk.getDisk()->getName() << std::endl; + String path = getValueFromCommandLineOptionsThrow(options, "path"); + // std::cerr << "Disk path: " << path << std::endl; + disk.setPath(path); + } +}; + +CommandPtr makeCommandChangeDirectory() +{ + return std::make_unique(); +} + +} diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index f176fa277d7..e853e054f97 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -1,68 +1,45 @@ -#include "ICommand.h" #include #include +#include "DisksClient.h" +#include "ICommand.h" namespace DB { namespace ErrorCodes { - extern const int BAD_ARGUMENTS; +extern const int BAD_ARGUMENTS; } class CommandCopy final : public ICommand { public: - CommandCopy() + explicit CommandCopy() : ICommand() { command_name = "copy"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); description = "Recursively copy data from `FROM_PATH` to `TO_PATH`"; - usage = "copy [OPTION]... "; - command_option_description->add_options() - ("disk-from", po::value(), "disk from which we copy") - ("disk-to", po::value(), "disk to which we copy"); + options_description.add_options()("disk-from", po::value(), "disk from which we copy")( + "disk-to", po::value(), "disk to which we copy")( + "path-from", po::value(), "path from which we copy (mandatory, positional)")( + "path-to", po::value(), "path to which we copy (mandatory, positional)"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("disk-from")) - config.setString("disk-from", options["disk-from"].as()); - if (options.count("disk-to")) - config.setString("disk-to", options["disk-to"].as()); - } + auto disk_from = getDiskWithPath(client, options, "disk-from"); + auto disk_to = getDiskWithPath(client, options, "disk-to"); + String path_from = disk_from.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + String path_to = disk_to.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 2) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - - String disk_name_from = config.getString("disk-from", config.getString("disk", "default")); - String disk_name_to = config.getString("disk-to", config.getString("disk", "default")); - - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk_from = disk_selector->get(disk_name_from); - DiskPtr disk_to = disk_selector->get(disk_name_to); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to, /* read_settings= */ {}, /* write_settings= */ {}, /* cancellation_hook= */ {}); + disk_from.getDisk()->copyDirectoryContent( + path_from, disk_to.getDisk(), path_to, /* read_settings= */ {}, /* write_settings= */ {}, /* cancellation_hook= */ {}); } }; -} -std::unique_ptr makeCommandCopy() +CommandPtr makeCommandCopy() { return std::make_unique(); } +} diff --git a/programs/disks/CommandLink.cpp b/programs/disks/CommandLink.cpp index dbaa3162f82..8b467891d18 100644 --- a/programs/disks/CommandLink.cpp +++ b/programs/disks/CommandLink.cpp @@ -1,12 +1,12 @@ -#include "ICommand.h" #include +#include "ICommand.h" namespace DB { namespace ErrorCodes { - extern const int BAD_ARGUMENTS; +extern const int BAD_ARGUMENTS; } class CommandLink final : public ICommand @@ -16,42 +16,27 @@ public: { command_name = "link"; description = "Create hardlink from `from_path` to `to_path`"; - usage = "link [OPTION]... "; + options_description.add_options()( + "path-to", po::value(), "the path from which a hard link will be created (mandatory, positional)")( + "path-from", po::value(), "the path where a hard link will be created (mandatory, positional)"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - } + auto disk = client.getCurrentDiskWithPath(); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 2) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } + const String & path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + const String & path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); - String disk_name = config.getString("disk", "default"); - - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - disk->createHardLink(relative_path_from, relative_path_to); + disk.getDisk()->createHardLink(path_from, path_to); } }; -} -std::unique_ptr makeCommandLink() +CommandPtr makeCommandLink() { return std::make_unique(); } + +} diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index 7213802ea86..26a576abc7d 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -1,98 +1,92 @@ -#include "ICommand.h" #include #include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" namespace DB { namespace ErrorCodes { - extern const int BAD_ARGUMENTS; +extern const int BAD_ARGUMENTS; } class CommandList final : public ICommand { public: - CommandList() + explicit CommandList() : ICommand() { command_name = "list"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); description = "List files at path[s]"; - usage = "list [OPTION]... ..."; - command_option_description->add_options() - ("recursive", "recursively list all directories"); + options_description.add_options()("recursive", "recursively list the directory")("all", "show hidden files")( + "path", po::value(), "the path of listing (mandatory, positional)"); + positional_options_description.add("path", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("recursive")) - config.setBool("recursive", true); - } - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - - bool recursive = config.getBool("recursive", false); + bool recursive = options.count("recursive"); + bool show_hidden = options.count("all"); + auto disk = client.getCurrentDiskWithPath(); + String path = getValueFromCommandLineOptionsWithDefault(options, "path", ""); if (recursive) - listRecursive(disk, relative_path); + listRecursive(disk, disk.getAbsolutePath(path), show_hidden); else - list(disk, relative_path); + list(disk, path, show_hidden); } private: - static void list(const DiskPtr & disk, const std::string & relative_path) + static void list(const DiskWithPath & disk, const std::string & path, bool show_hidden) { - std::vector file_names; - disk->listFiles(relative_path, file_names); + std::vector file_names = disk.listAllFilesByPath(path); + std::vector selected_and_sorted_file_names{}; for (const auto & file_name : file_names) - std::cout << file_name << '\n'; + if (show_hidden || (!file_name.starts_with('.'))) + selected_and_sorted_file_names.push_back(file_name); + + std::sort(selected_and_sorted_file_names.begin(), selected_and_sorted_file_names.end()); + for (const auto & file_name : selected_and_sorted_file_names) + { + std::cout << file_name << "\n"; + } } - static void listRecursive(const DiskPtr & disk, const std::string & relative_path) + static void listRecursive(const DiskWithPath & disk, const std::string & absolute_path, bool show_hidden) { - std::vector file_names; - disk->listFiles(relative_path, file_names); + std::vector file_names = disk.listAllFilesByPath(absolute_path); + std::vector selected_and_sorted_file_names{}; - std::cout << relative_path << ":\n"; + std::cout << absolute_path << ":\n"; if (!file_names.empty()) { for (const auto & file_name : file_names) - std::cout << file_name << '\n'; - std::cout << "\n"; + if (show_hidden || (!file_name.starts_with('.'))) + selected_and_sorted_file_names.push_back(file_name); } + std::sort(selected_and_sorted_file_names.begin(), selected_and_sorted_file_names.end()); + for (const auto & file_name : selected_and_sorted_file_names) + { + std::cout << file_name << "\n"; + } + std::cout << "\n"; + for (const auto & file_name : file_names) { - auto path = relative_path.empty() ? file_name : (relative_path + "/" + file_name); - if (disk->isDirectory(path)) - listRecursive(disk, path); + auto path = absolute_path + "/" + file_name; + if (disk.isDirectory(path)) + if (show_hidden || (!file_name.starts_with('.'))) + listRecursive(disk, path, show_hidden); } } }; -} -std::unique_ptr makeCommandList() +CommandPtr makeCommandList() { - return std::make_unique(); + return std::make_shared(); +} } diff --git a/programs/disks/CommandListDisks.cpp b/programs/disks/CommandListDisks.cpp index 79da021fd00..16779b0fdae 100644 --- a/programs/disks/CommandListDisks.cpp +++ b/programs/disks/CommandListDisks.cpp @@ -1,68 +1,45 @@ -#include "ICommand.h" +#include #include +#include +#include "DisksClient.h" +#include "ICommand.h" namespace DB { namespace ErrorCodes { - extern const int BAD_ARGUMENTS; +extern const int BAD_ARGUMENTS; } class CommandListDisks final : public ICommand { public: - CommandListDisks() + explicit CommandListDisks() : ICommand() { command_name = "list-disks"; - description = "List disks names"; - usage = "list-disks [OPTION]"; + description = "Lists all available disks"; } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override - {} - - void execute( - const std::vector & command_arguments, - std::shared_ptr &, - Poco::Util::LayeredConfiguration & config) override + void executeImpl(const CommandLineOptions &, DisksClient & client) override { - if (!command_arguments.empty()) + std::vector sorted_and_selected{}; + for (const auto & disk_name : client.getAllDiskNames()) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + sorted_and_selected.push_back(disk_name + ":" + client.getDiskWithPath(disk_name).getAbsolutePath("")); } - - constexpr auto config_prefix = "storage_configuration.disks"; - constexpr auto default_disk_name = "default"; - - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_prefix, keys); - - bool has_default_disk = false; - - /// For the output to be ordered - std::set disks; - - for (const auto & disk_name : keys) + std::sort(sorted_and_selected.begin(), sorted_and_selected.end()); + for (const auto & disk_name : sorted_and_selected) { - if (disk_name == default_disk_name) - has_default_disk = true; - disks.insert(disk_name); + std::cout << disk_name << "\n"; } - - if (!has_default_disk) - disks.insert(default_disk_name); - - for (const auto & disk : disks) - std::cout << disk << '\n'; } -}; -} -std::unique_ptr makeCommandListDisks() +private: +}; + +CommandPtr makeCommandListDisks() { - return std::make_unique(); + return std::make_shared(); +} } diff --git a/programs/disks/CommandMkDir.cpp b/programs/disks/CommandMkDir.cpp index 6d33bdec498..23312435d4e 100644 --- a/programs/disks/CommandMkDir.cpp +++ b/programs/disks/CommandMkDir.cpp @@ -8,7 +8,7 @@ namespace DB namespace ErrorCodes { - extern const int BAD_ARGUMENTS; +extern const int BAD_ARGUMENTS; } class CommandMkDir final : public ICommand @@ -17,50 +17,29 @@ public: CommandMkDir() { command_name = "mkdir"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); description = "Create a directory"; - usage = "mkdir [OPTION]... "; - command_option_description->add_options() - ("recursive", "recursively create directories"); + options_description.add_options()("recursive", "recursively create directories")( + "path", po::value(), "the path of listing (mandatory, positional)"); + positional_options_description.add("path", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("recursive")) - config.setBool("recursive", true); - } + bool recursive = options.count("recursive"); + auto disk = client.getCurrentDiskWithPath(); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - bool recursive = config.getBool("recursive", false); + String path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path")); if (recursive) - disk->createDirectories(relative_path); + disk.getDisk()->createDirectories(path); else - disk->createDirectory(relative_path); + disk.getDisk()->createDirectory(path); } }; -} -std::unique_ptr makeCommandMkDir() +CommandPtr makeCommandMkDir() { return std::make_unique(); } + +} diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 75cf96252ed..25620de448e 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -1,12 +1,12 @@ -#include "ICommand.h" #include +#include "ICommand.h" namespace DB { namespace ErrorCodes { - extern const int BAD_ARGUMENTS; +extern const int BAD_ARGUMENTS; } class CommandMove final : public ICommand @@ -16,44 +16,29 @@ public: { command_name = "move"; description = "Move file or directory from `from_path` to `to_path`"; - usage = "move [OPTION]... "; + options_description.add_options()("path-from", po::value(), "path from which we copy (mandatory, positional)")( + "path-to", po::value(), "path to which we copy (mandatory, positional)"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override - {} - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (command_arguments.size() != 2) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } + auto disk = client.getCurrentDiskWithPath(); - String disk_name = config.getString("disk", "default"); + String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - if (disk->isFile(relative_path_from)) - disk->moveFile(relative_path_from, relative_path_to); + if (disk.getDisk()->isFile(path_from)) + disk.getDisk()->moveFile(path_from, path_from); else - disk->moveDirectory(relative_path_from, relative_path_to); + disk.getDisk()->moveDirectory(path_from, path_from); } }; -} -std::unique_ptr makeCommandMove() +CommandPtr makeCommandMove() { return std::make_unique(); } + +} diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp index 0f3ac7ab98c..82ff90b6e02 100644 --- a/programs/disks/CommandRead.cpp +++ b/programs/disks/CommandRead.cpp @@ -1,16 +1,16 @@ -#include "ICommand.h" -#include #include #include #include +#include #include +#include "ICommand.h" namespace DB { namespace ErrorCodes { - extern const int BAD_ARGUMENTS; +extern const int BAD_ARGUMENTS; } class CommandRead final : public ICommand @@ -19,60 +19,44 @@ public: CommandRead() { command_name = "read"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); + // command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); description = "Read a file from `FROM_PATH` to `TO_PATH`"; - usage = "read [OPTION]... []"; - command_option_description->add_options() - ("output", po::value(), "file to which we are reading, defaults to `stdout`"); + options_description.add_options()( + "path-from", po::value(), "file from which we are reading, defaults to `stdin` (mandatory, positional)")( + "path-to", po::value(), "file to which we are writing"); + positional_options_description.add("path-from", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("output")) - config.setString("output", options["output"].as()); - } - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) + auto disk = client.getCurrentDiskWithPath(); + String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + std::optional path_to = getValueFromCommandLineOptionsWithOptional(options, "path-to"); + if (path_to.has_value()) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + path_to = std::optional{disk.getRelativeFromRoot(path_to.value())}; } - String disk_name = config.getString("disk", "default"); - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(command_arguments[0]); - - String path_output = config.getString("output", ""); - - if (!path_output.empty()) + auto in = disk.getDisk()->readFile(path_from); + if (path_to.has_value()) { - String relative_path_output = validatePathAndGetAsRelative(path_output); + String relative_path_to = disk.getRelativeFromRoot(path_to.value()); - auto in = disk->readFile(relative_path); - auto out = disk->writeFile(relative_path_output); + auto out = disk.getDisk()->writeFile(relative_path_to); copyData(*in, *out); out->finalize(); } else { - auto in = disk->readFile(relative_path); std::unique_ptr out = std::make_unique(STDOUT_FILENO); copyData(*in, *out); } } }; -} -std::unique_ptr makeCommandRead() +CommandPtr makeCommandRead() { return std::make_unique(); } + +} diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index 0c631eacff3..0344a09d156 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -1,12 +1,12 @@ -#include "ICommand.h" #include +#include "ICommand.h" namespace DB { namespace ErrorCodes { - extern const int BAD_ARGUMENTS; +extern const int BAD_ARGUMENTS; } class CommandRemove final : public ICommand @@ -15,40 +15,23 @@ public: CommandRemove() { command_name = "remove"; - description = "Remove file or directory with all children. Throws exception if file doesn't exists.\nPath should be in format './' or './path' or 'path'"; - usage = "remove [OPTION]... "; + description = "Remove file or directory with all children. Throws exception if file doesn't exists.\nPath should be in format './' " + "or './path' or 'path'"; + options_description.add_options()("path", po::value(), "path from which we copy (mandatory, positional)"); + positional_options_description.add("path", 1); } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override - {} - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (command_arguments.size() != 1) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - - disk->removeRecursive(relative_path); + auto disk = client.getCurrentDiskWithPath(); + const String & path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path")); + disk.getDisk()->removeRecursive(path); } }; -} -std::unique_ptr makeCommandRemove() +CommandPtr makeCommandRemove() { return std::make_unique(); } + +} diff --git a/programs/disks/CommandSwitchDisk.cpp b/programs/disks/CommandSwitchDisk.cpp new file mode 100644 index 00000000000..6c1fbaa0623 --- /dev/null +++ b/programs/disks/CommandSwitchDisk.cpp @@ -0,0 +1,47 @@ +#include +#include +#include "Common/Exception.h" +#include +#include "DisksApp.h" +#include "ICommand.h" + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} + +class CommandSwitchDisk final : public ICommand +{ +public: + explicit CommandSwitchDisk() : ICommand() + { + command_name = "switch-disk"; + // options_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); + description = "Change disk"; + // options_description->add_options()("recursive", "recursively list all directories"); + options_description.add_options()("disk", po::value(), "the disk to switch to (mandatory, positional)")( + "path", po::value(), "the path to switch on the disk"); + positional_options_description.add("disk", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override + { + String disk = getValueFromCommandLineOptions(options, "disk"); + std::optional path = getValueFromCommandLineOptionsWithOptional(options, "path"); + + if (!client.switchToDisk(disk, path)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Unable to switch to disk: {}, path: {}", disk, path.has_value() ? path.value() : "NO PATH"); + } + } +}; + +CommandPtr makeCommandSwitchDisk() +{ + return std::make_unique(); +} +} diff --git a/programs/disks/CommandWrite.cpp b/programs/disks/CommandWrite.cpp index 7ded37e067a..42999572443 100644 --- a/programs/disks/CommandWrite.cpp +++ b/programs/disks/CommandWrite.cpp @@ -1,17 +1,17 @@ -#include "ICommand.h" #include +#include "ICommand.h" -#include #include #include #include +#include namespace DB { namespace ErrorCodes { - extern const int BAD_ARGUMENTS; +extern const int BAD_ARGUMENTS; } class CommandWrite final : public ICommand @@ -20,60 +20,47 @@ public: CommandWrite() { command_name = "write"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); + // command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); description = "Write a file from `FROM_PATH` to `TO_PATH`"; - usage = "write [OPTION]... [] "; - command_option_description->add_options() - ("input", po::value(), "file from which we are reading, defaults to `stdin`"); + options_description.add_options()("path-from", po::value(), "file from which we are reading, defaults to `stdin`")( + "path-to", po::value(), "file to which we are writing (mandatory, positional)"); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("input")) - config.setString("input", options["input"].as()); - } + auto disk = client.getCurrentDiskWithPath(); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) + std::optional path_from = getValueFromCommandLineOptionsWithOptional(options, "path-from"); + if (path_from.has_value()) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + path_from = std::optional{disk.getRelativeFromRoot(path_from.value())}; } - String disk_name = config.getString("disk", "default"); + String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - - String path_input = config.getString("input", ""); - std::unique_ptr in; - if (path_input.empty()) + auto in = [&]() -> std::unique_ptr { - in = std::make_unique(STDIN_FILENO); - } - else - { - String relative_path_input = validatePathAndGetAsRelative(path_input); - in = disk->readFile(relative_path_input); - } + if (!path_from.has_value()) + { + return std::make_unique(STDIN_FILENO); + } + else + { + String relative_path_from = disk.getRelativeFromRoot(path_from.value()); + return disk.getDisk()->readFile(relative_path_from); + } + }(); - auto out = disk->writeFile(relative_path); + auto out = disk.getDisk()->writeFile(path_to); copyData(*in, *out); out->finalize(); } }; + +CommandPtr makeCommandWrite() +{ + return std::make_shared(); } -std::unique_ptr makeCommandWrite() -{ - return std::make_unique(); } diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 5da5ab4bae9..02e8b74b889 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -1,85 +1,263 @@ #include "DisksApp.h" +#include +#include +#include +#include +#include "Common/Exception.h" +#include +#include +#include +#include +#include "DisksClient.h" #include "ICommand.h" +#include +#include +#include +#include + #include -#include #include - +#include namespace DB { -namespace ErrorCodes +CommandPtr DisksApp::getCommandByName(String command) const { - extern const int BAD_ARGUMENTS; + auto it = aliases.find(command); + if (it != aliases.end()) + { + command = it->second; + } + try + { + return command_descriptions.at(command); + } + catch (...) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The command {} is unknown", command); + } } -size_t DisksApp::findCommandPos(std::vector & common_arguments) +std::vector DisksApp::getEmptyCompletion(CommandPtr command_) const { - for (size_t i = 0; i < common_arguments.size(); i++) - if (supported_commands.contains(common_arguments[i])) - return i + 1; - return common_arguments.size(); + auto answer = [&]() -> std::vector + { + if (multidisk_commands.contains(command_->command_name)) + { + return client->getAllFilesByPatternFromAllDisks(""); + } + else + { + return client->getCurrentDiskWithPath().getAllFilesByPattern(""); + } + }(); + for (const auto & disk_name : client->getAllDiskNames()) + { + answer.push_back(disk_name); + } + for (const auto & option : command_->options_description.options()) + { + answer.push_back("--" + option->long_name()); + } + std::sort(answer.begin(), answer.end()); + return answer; } -void DisksApp::printHelpMessage(ProgramOptionsDescription & command_option_description) +std::vector DisksApp::getCompletions(const String & prefix) const { - std::optional help_description = - createOptionsDescription("Help Message for clickhouse-disks", getTerminalWidth()); + auto arguments = split(prefix, word_break_characters); + if (arguments.empty()) + { + return {}; + } + if (word_break_characters.contains(prefix.back())) + { + CommandPtr command; + try + { + command = getCommandByName(arguments[0]); + } + catch (...) + { + return {arguments.back()}; + } + return getEmptyCompletion(command); + } + else if (arguments.size() == 1) + { + String command_prefix = arguments[0]; + std::vector answer{}; + for (const auto & [word, _] : command_descriptions) + { + if (word.starts_with(command_prefix)) + { + answer.push_back(word); + } + } + if (!answer.empty()) + { + return answer; + } + for (const auto & [word, _] : aliases) + { + if (word.starts_with(command_prefix)) + { + answer.push_back(word); + } + } + if (!answer.empty()) + { + return answer; + } + return {command_prefix}; + } + else + { + String last_token = arguments.back(); + CommandPtr command; + try + { + command = getCommandByName(arguments[0]); + } + catch (...) + { + return {last_token}; + } + auto answer = [&]() -> std::vector + { + if (multidisk_commands.contains(command->command_name)) + { + return client->getAllFilesByPatternFromAllDisks(last_token); + } + else + { + return client->getCurrentDiskWithPath().getAllFilesByPattern(last_token); + } + }(); - help_description->add(command_option_description); - - std::cout << "ClickHouse disk management tool\n"; - std::cout << "Usage: ./clickhouse-disks [OPTION]\n"; - std::cout << "clickhouse-disks\n\n"; - - for (const auto & current_command : supported_commands) - std::cout << command_descriptions[current_command]->command_name - << "\t" - << command_descriptions[current_command]->description - << "\n\n"; - - std::cout << command_option_description << '\n'; + for (const auto & disk_name : client->getAllDiskNames()) + { + if (disk_name.starts_with(last_token)) + { + answer.push_back(disk_name); + } + } + for (const auto & option : command->options_description.options()) + { + String option_sign = "--" + option->long_name(); + if (option_sign.starts_with(last_token)) + { + answer.push_back(option_sign); + } + } + if (!answer.empty()) + { + return answer; + } + else + { + return {last_token}; + } + } } -String DisksApp::getDefaultConfigFileName() +bool DisksApp::processQueryText(String text) { - return "/etc/clickhouse-server/config.xml"; + if (exit_strings.find(text) != exit_strings.end()) + return false; + CommandPtr command; + try + { + auto arguments = split(text, word_break_characters); + command = getCommandByName(arguments[0]); + arguments.erase(arguments.begin()); + command->execute(arguments, *client); + } + catch (DB::Exception & err) + { + int code = getCurrentExceptionCode(); + if (code == ErrorCodes::LOGICAL_ERROR) + { + throw std::move(err); + } + else if (code == ErrorCodes::BAD_ARGUMENTS) + { + std::cerr << err.message() << "\n" + << "\n"; + if (command.get()) + { + std::cerr << "COMMAND: " << command->command_name << "\n"; + std::cerr << command->options_description << "\n"; + } + } + else + { + std::cerr << err.message() << "\n"; + } + } + catch (std::exception & err) + { + std::cerr << err.what() << "\n"; + } + + return true; } -void DisksApp::addOptions( - ProgramOptionsDescription & options_description_, - boost::program_options::positional_options_description & positional_options_description -) +void DisksApp::runInteractiveReplxx() { - options_description_.add_options() - ("help,h", "Print common help message") - ("config-file,C", po::value(), "Set config file") - ("disk", po::value(), "Set disk name") - ("command_name", po::value(), "Name for command to do") - ("save-logs", "Save logs to a file") - ("log-level", po::value(), "Logging level") - ; + ReplxxLineReader lr( + suggest, + history_file, + /* multiline= */ false, + query_extenders, + query_delimiters, + word_break_characters.c_str(), + /* highlighter_= */ {}); + lr.enableBracketedPaste(); - positional_options_description.add("command_name", 1); + while (true) + { + String prompt = client->getCurrentDiskWithPath().getPrompt(); - supported_commands = {"list-disks", "list", "move", "remove", "link", "copy", "write", "read", "mkdir"}; -#ifdef CLICKHOUSE_CLOUD - supported_commands.insert("packed-io"); -#endif + auto input = lr.readLine(prompt, ":-] "); + if (input.empty()) + break; + + if (!processQueryText(input)) + break; + } +} + +void DisksApp::parseAndCheckOptions( + const std::vector & arguments, const ProgramOptionsDescription & options_description, CommandLineOptions & options) +{ + auto parser = po::command_line_parser(arguments).options(options_description).allow_unregistered(); + po::parsed_options parsed = parser.run(); + po::store(parsed, options); +} + +void DisksApp::addOptions() +{ + options_description.add_options()("help,h", "Print common help message")("config-file,C", po::value(), "Set config file")( + "disk", po::value(), "Set disk name")("save-logs", "Save logs to a file")( + "log-level", po::value(), "Logging level"); command_descriptions.emplace("list-disks", makeCommandListDisks()); + command_descriptions.emplace("copy", makeCommandCopy()); command_descriptions.emplace("list", makeCommandList()); + command_descriptions.emplace("cd", makeCommandChangeDirectory()); command_descriptions.emplace("move", makeCommandMove()); command_descriptions.emplace("remove", makeCommandRemove()); command_descriptions.emplace("link", makeCommandLink()); - command_descriptions.emplace("copy", makeCommandCopy()); command_descriptions.emplace("write", makeCommandWrite()); command_descriptions.emplace("read", makeCommandRead()); command_descriptions.emplace("mkdir", makeCommandMkDir()); + command_descriptions.emplace("switch-disk", makeCommandSwitchDisk()); #ifdef CLICKHOUSE_CLOUD - command_descriptions.emplace("packed-io", makeCommandPackedIO()); + // command_descriptions.emplace("packed-io", makeCommandPackedIO()); #endif } @@ -95,29 +273,66 @@ void DisksApp::processOptions() config().setString("log-level", options["log-level"].as()); } -DisksApp::~DisksApp() +void DisksApp::printHelpMessage(const ProgramOptionsDescription &) { - if (global_context) - global_context->shutdown(); + std::optional help_description + = createOptionsDescription("Help Message for clickhouse-disks", getTerminalWidth()); + + help_description->add(options_description); + + std::cout << "ClickHouse disk management tool\n"; + std::cout << "Usage: ./clickhouse-disks [OPTION]\n"; + std::cout << "clickhouse-disks\n\n"; + + for (const auto & [current_command, _] : command_descriptions) + { + std::cout << command_descriptions[current_command]->command_name; + bool was = false; + for (const auto & [alias_name, alias_command_name] : aliases) + { + if (alias_command_name == current_command) + { + if (was) + std::cout << ","; + else + std::cout << "("; + std::cout << alias_name; + was = true; + } + } + std::cout << (was ? ")" : "") << " \t" << command_descriptions[current_command]->description << "\n\n"; + } } -void DisksApp::init(std::vector & common_arguments) +void DisksApp::initializeHistoryFile() { - stopOptionsProcessing(); + String home_path; + const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe) + if (home_path_cstr) + home_path = home_path_cstr; + if (config().has("history-file")) + history_file = config().getString("history-file"); + else + history_file = home_path + "/.disks-file-history"; - ProgramOptionsDescription options_description{createOptionsDescription("clickhouse-disks", getTerminalWidth())}; + if (!history_file.empty() && !fs::exists(history_file)) + { + try + { + FS::createFile(history_file); + } + catch (const ErrnoException & e) + { + if (e.getErrno() != EEXIST) + throw; + } + } +} - po::positional_options_description positional_options_description; - - addOptions(options_description, positional_options_description); - - size_t command_pos = findCommandPos(common_arguments); - std::vector global_flags(command_pos); - command_arguments.resize(common_arguments.size() - command_pos); - copy(common_arguments.begin(), common_arguments.begin() + command_pos, global_flags.begin()); - copy(common_arguments.begin() + command_pos, common_arguments.end(), command_arguments.begin()); - - parseAndCheckOptions(options_description, positional_options_description, global_flags); +void DisksApp::init(const std::vector & common_arguments) +{ + addOptions(); + parseAndCheckOptions(common_arguments, options_description, options); po::notify(options); @@ -127,42 +342,18 @@ void DisksApp::init(std::vector & common_arguments) exit(0); // NOLINT(concurrency-mt-unsafe) } - if (!supported_commands.contains(command_name)) - { - std::cerr << "Unknown command name: " << command_name << "\n"; - printHelpMessage(options_description); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - processOptions(); } -void DisksApp::parseAndCheckOptions( - ProgramOptionsDescription & options_description_, - boost::program_options::positional_options_description & positional_options_description, - std::vector & arguments) +String DisksApp::getDefaultConfigFileName() { - auto parser = po::command_line_parser(arguments) - .options(options_description_) - .positional(positional_options_description) - .allow_unregistered(); - - po::parsed_options parsed = parser.run(); - po::store(parsed, options); - - auto positional_arguments = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - for (const auto & arg : positional_arguments) - { - if (command_descriptions.contains(arg)) - { - command_name = arg; - break; - } - } + return "/etc/clickhouse-server/config.xml"; } int DisksApp::main(const std::vector & /*args*/) { + std::vector keys; + config().keys(keys); if (config().has("config-file") || fs::exists(getDefaultConfigFileName())) { String config_path = config().getString("config-file", getDefaultConfigFileName()); @@ -176,6 +367,9 @@ int DisksApp::main(const std::vector & /*args*/) throw Exception(ErrorCodes::BAD_ARGUMENTS, "No config-file specified"); } + config().keys(keys); + initializeHistoryFile(); + if (config().has("save-logs")) { auto log_level = config().getString("log-level", "trace"); @@ -193,7 +387,7 @@ int DisksApp::main(const std::vector & /*args*/) registerDisks(/* global_skip_access_check= */ true); registerFormats(); - shared_context = Context::createShared(); + auto shared_context = Context::createShared(); global_context = Context::createGlobal(shared_context.get()); global_context->makeGlobalContext(); @@ -202,59 +396,37 @@ int DisksApp::main(const std::vector & /*args*/) String path = config().getString("path", DBMS_DEFAULT_PATH); global_context->setPath(path); - auto & command = command_descriptions[command_name]; + String main_disk = config().getString("disk", "default"); - auto command_options = command->getCommandOptions(); - std::vector args; - if (command_options) - { - auto parser = po::command_line_parser(command_arguments).options(*command_options).allow_unregistered(); - po::parsed_options parsed = parser.run(); - po::store(parsed, options); - po::notify(options); - - args = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - command->processOptions(config(), options); - } - else - { - auto parser = po::command_line_parser(command_arguments).options({}).allow_unregistered(); - po::parsed_options parsed = parser.run(); - args = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - } - - std::unordered_set disks - { - config().getString("disk", "default"), - config().getString("disk-from", config().getString("disk", "default")), - config().getString("disk-to", config().getString("disk", "default")), - }; - - auto validator = [&disks]( - const Poco::Util::AbstractConfiguration & config, - const std::string & disk_config_prefix, - const std::string & disk_name) - { - if (!disks.contains(disk_name)) - return false; - - const auto disk_type = config.getString(disk_config_prefix + ".type", "local"); - - if (disk_type == "cache") - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk type 'cache' of disk {} is not supported by clickhouse-disks", disk_name); - - return true; - }; + auto validator = [](const Poco::Util::AbstractConfiguration &, const std::string &, const std::string &) { return true; }; constexpr auto config_prefix = "storage_configuration.disks"; auto disk_selector = std::make_shared(); disk_selector->initialize(config(), config_prefix, global_context, validator); - command->execute(args, disk_selector, config()); + std::vector>> disks_with_path; + + for (const auto & [_, disk_ptr] : disk_selector->getDisksMap()) + { + disks_with_path.emplace_back( + disk_ptr, (disk_ptr->getName() == "local") ? std::optional{fs::current_path().string()} : std::nullopt); + } + + + client = std::make_unique(std::move(disks_with_path), main_disk); + + suggest.setCompletionsCallback([&](const String & prefix, size_t /* prefix_length */) { return getCompletions(prefix); }); + + runInteractiveReplxx(); return Application::EXIT_OK; } +DisksApp::~DisksApp() +{ + if (global_context) + global_context->shutdown(); +} } int mainEntryClickHouseDisks(int argc, char ** argv) @@ -269,16 +441,16 @@ int mainEntryClickHouseDisks(int argc, char ** argv) catch (const DB::Exception & e) { std::cerr << DB::getExceptionMessage(e, false) << std::endl; - return 1; + return 0; } catch (const boost::program_options::error & e) { std::cerr << "Bad arguments: " << e.what() << std::endl; - return DB::ErrorCodes::BAD_ARGUMENTS; + return 0; } catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; - return 1; + return 0; } } diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h index 51bc3f58dc4..a0ce98b51d0 100644 --- a/programs/disks/DisksApp.h +++ b/programs/disks/DisksApp.h @@ -1,61 +1,86 @@ #pragma once +#include +#include +#include #include +#include "DisksClient.h" +#include "ICommand_fwd.h" #include +#include +#include #include -#include - namespace DB { -class ICommand; -using CommandPtr = std::unique_ptr; - -namespace po = boost::program_options; using ProgramOptionsDescription = boost::program_options::options_description; using CommandLineOptions = boost::program_options::variables_map; -class DisksApp : public Poco::Util::Application, public Loggers +class DisksApp : public Poco::Util::Application { public: - DisksApp() = default; - ~DisksApp() override; + void addOptions(); - void init(std::vector & common_arguments); - - int main(const std::vector & args) override; - -protected: - static String getDefaultConfigFileName(); - - void addOptions( - ProgramOptionsDescription & options_description, - boost::program_options::positional_options_description & positional_options_description); void processOptions(); - void printHelpMessage(ProgramOptionsDescription & command_option_description); + bool processQueryText(String text); - size_t findCommandPos(std::vector & common_arguments); + void init(const std::vector & common_arguments); + + int main(const std::vector & /*args*/) override; + + CommandPtr getCommandByName(String command) const; + + void initializeHistoryFile(); + + static void parseAndCheckOptions( + const std::vector & arguments, const ProgramOptionsDescription & options_description, CommandLineOptions & options); + + void printHelpMessage(const ProgramOptionsDescription &); + + std::vector getCompletions(const String & prefix) const; + + std::vector getEmptyCompletion(CommandPtr command_) const; + + ~DisksApp() override; private: - void parseAndCheckOptions( - ProgramOptionsDescription & options_description, - boost::program_options::positional_options_description & positional_options_description, - std::vector & arguments); + void runInteractiveReplxx(); -protected: + String getDefaultConfigFileName(); + + // Fields responsible for the REPL work + String history_file; + LineReader::Suggest suggest; + LineReader::Patterns query_extenders = {"\\"}; + LineReader::Patterns query_delimiters = {}; + String word_break_characters{" \t\v\f\a\b\r\n"}; + + // General commang line arguments parsing fields ContextMutablePtr global_context; - SharedContextHolder shared_context; - - String command_name; - std::vector command_arguments; - - std::unordered_set supported_commands; + ProgramOptionsDescription options_description; + CommandLineOptions options; std::unordered_map command_descriptions; - po::variables_map options; + const std::unordered_map aliases + = {{"cp", "copy"}, + {"mv", "move"}, + {"ls", "list"}, + {"list_disks", "list-disks"}, + {"ln", "link"}, + {"rm", "remove"}, + {"r", "read"}, + {"w", "write"}, + {"delete", "remove"}, + {"ls-disks", "list-disks"}, + {"ls_disks", "list-disks"}, + {"packed_io", "packed-io"}}; + + std::set multidisk_commands = {"copy", "packed-io", "switch-disk", "cd"}; + + std::unique_ptr client{}; }; } diff --git a/programs/disks/DisksClient.cpp b/programs/disks/DisksClient.cpp new file mode 100644 index 00000000000..3c258b5aa6e --- /dev/null +++ b/programs/disks/DisksClient.cpp @@ -0,0 +1,42 @@ +#include "DisksClient.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "ICommand.h" + +#include +#include + +#include + +#include +#include + +namespace DB +{ +std::vector split(const String & text, const String & delimiters) +{ + std::vector arguments; + auto prev = text.begin(); + auto pos = std::find_if(text.begin(), text.end(), [&](char x) { return delimiters.contains(x); }); + while (pos != text.end()) + { + if (pos > prev) + { + arguments.push_back({prev, pos}); + } + prev = ++pos; + pos = std::find_if(prev, text.end(), [&](char x) { return delimiters.contains(x); }); + } + if (pos > prev) + { + arguments.push_back({prev, text.end()}); + } + return arguments; + } +} diff --git a/programs/disks/DisksClient.h b/programs/disks/DisksClient.h new file mode 100644 index 00000000000..e3b8cf7c8a9 --- /dev/null +++ b/programs/disks/DisksClient.h @@ -0,0 +1,316 @@ +#pragma once + +#include <__tuple> +#include +#include +#include +#include +#include +#include +#include +#include "Disks/DiskSelector.h" +#include "Disks/IDisk.h" +#include "ICommand_fwd.h" +#include "IO/ReadHelpers.h" + +#include +#include +#include +#include "Common/Exception.h" + +// #include +namespace fs = std::filesystem; + +namespace DB +{ + + +std::vector split(const String & text, const String & delimiters); + +using ProgramOptionsDescription = boost::program_options::options_description; +using CommandLineOptions = boost::program_options::variables_map; + + +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +}; + +class DiskWithPath +{ +public: + explicit DiskWithPath(DiskPtr disk_, std::optional path_ = std::nullopt) + : disk(disk_) + , path( + [&]() + { + if (path_.has_value()) + { + if (!fs::path{path_.value()}.is_absolute()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} is not absolute", path_.value()); + } + return path_.value(); + } + else + { + return String{"/"}; + } + }()) + { + if (!disk->isDirectory(normalizePathAndGetAsRelative(path))) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} at disk {} is not a directory", path, disk->getName()); + } + } + + String getPrompt() { return disk->getName() + ":" + path + "$ "; } + + String getAbsolutePath(const String & any_path) const { return normalizePath(fs::path(path) / any_path); } + + String getCurrentPath() const { return path; } + + bool isDirectory(const String & any_path) const { return disk->isDirectory(getRelativeFromRoot(any_path)); } + + std::vector listAllFilesByPath(const String & any_path) const + { + if (isDirectory(any_path)) + { + std::vector file_names; + disk->listFiles(getRelativeFromRoot(any_path), file_names); + return file_names; + } + else + { + return {}; + } + } + + std::vector getAllFilesByPattern(std::string pattern) const + { + auto [path_before, path_after] = [&]() -> std::pair + { + auto slash_pos = pattern.find_last_of('/'); + if (slash_pos >= pattern.size()) + { + return {"", pattern}; + } + else + { + return {pattern.substr(0, slash_pos + 1), pattern.substr(slash_pos + 1, pattern.size() - slash_pos - 1)}; + } + }(); + + if (!isDirectory(path_before)) + { + return {}; + } + else + { + std::vector file_names = listAllFilesByPath(path_before); + + std::vector answer; + + for (const auto & file_name : file_names) + { + if (file_name.starts_with(path_after)) + { + String file_pattern = path_before + file_name; + if (isDirectory(file_pattern)) + { + file_pattern = file_pattern + "/"; + } + answer.push_back(file_pattern); + } + } + return answer; + } + } + + DiskPtr getDisk() const { return disk; } + + void setPath(const String & any_path) + { + if (isDirectory(any_path)) + { + path = getAbsolutePath(any_path); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} at disk {} is not a directory", any_path, disk->getName()); + } + } + + String getRelativeFromRoot(const String & any_path) const { return normalizePathAndGetAsRelative(getAbsolutePath(any_path)); } + +private: + static String validatePathAndGetAsRelative(const String & path) + { + String lexically_normal_path = fs::path(path).lexically_normal(); + if (lexically_normal_path.find("..") != std::string::npos) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Path {} is not normalized", path); + + /// If path is absolute we should keep it as relative inside disk, so disk will look like + /// an ordinary filesystem with root. + if (fs::path(lexically_normal_path).is_absolute()) + return lexically_normal_path.substr(1); + + return lexically_normal_path; + } + + static std::string normalizePathAndGetAsRelative(const std::string & messyPath) + { + std::filesystem::path path(messyPath); + std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path); + std::string npath = canonical_path.make_preferred().string(); + return validatePathAndGetAsRelative(npath); + } + + static std::string normalizePath(const std::string & messyPath) + { + std::filesystem::path path(messyPath); + std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path); + return canonical_path.make_preferred().string(); + } + + const DiskPtr disk; + String path; +}; + +class DisksClient +{ +public: + explicit DisksClient(std::vector>> && disks_with_paths, std::optional begin_disk) + { + if (disks_with_paths.empty()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing array of disks is empty"); + } + if (!begin_disk.has_value()) + { + begin_disk = disks_with_paths[0].first->getName(); + } + bool has_begin_disk = true; + for (auto & [disk, path] : disks_with_paths) + { + addDisk(disk, path); + if (disk->getName() == begin_disk.value()) + { + has_begin_disk = true; + } + } + if (!has_begin_disk) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no begin_disk '{}' in initializing array", begin_disk.value()); + } + current_disk = std::move(begin_disk.value()); + } + + const DiskWithPath & getDiskWithPath(const String & disk) const + { + try + { + return disks.at(disk); + } + catch (...) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk); + } + } + + DiskWithPath & getDiskWithPath(const String & disk) + { + try + { + return disks.at(disk); + } + catch (...) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk); + } + } + + const DiskWithPath & getCurrentDiskWithPath() const + { + try + { + return disks.at(current_disk); + } + catch (...) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no current disk in client"); + } + } + + DiskWithPath & getCurrentDiskWithPath() + { + try + { + return disks.at(current_disk); + } + catch (...) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no current disk in client"); + } + } + + DiskPtr getCurrentDisk() const { return getCurrentDiskWithPath().getDisk(); } + + DiskPtr getDisk(const String & disk) const { return getDiskWithPath(disk).getDisk(); } + + bool switchToDisk(const String & disk_, const std::optional & path_) + { + if (disks.contains(disk_)) + { + if (path_.has_value()) + { + disks.at(disk_).setPath(path_.value()); + } + current_disk = disk_; + return true; + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk_); + } + } + + std::vector getAllDiskNames() const + { + std::vector answer{}; + answer.reserve(disks.size()); + for (const auto & [disk_name, _] : disks) + { + answer.push_back(disk_name); + } + return answer; + } + + std::vector getAllFilesByPatternFromAllDisks(std::string pattern) const + { + std::vector answer{}; + for (const auto & [_, disk] : disks) + { + for (auto & word : disk.getAllFilesByPattern(pattern)) + { + answer.push_back(word); + } + } + return answer; + } + +private: + void addDisk(DiskPtr disk_, const std::optional & path_) + { + String disk_name = disk_->getName(); + if (disks.contains(disk_->getName())) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' already exists", disk_name); + } + disks.emplace(disk_name, DiskWithPath{disk_, path_}); + } + + String current_disk; + std::unordered_map disks; +}; +} diff --git a/programs/disks/ICommand.cpp b/programs/disks/ICommand.cpp index 86188fb6db1..41610f1086f 100644 --- a/programs/disks/ICommand.cpp +++ b/programs/disks/ICommand.cpp @@ -1,5 +1,6 @@ #include "ICommand.h" #include +#include "DisksClient.h" namespace DB @@ -7,46 +8,45 @@ namespace DB namespace ErrorCodes { - extern const int BAD_ARGUMENTS; +extern const int BAD_ARGUMENTS; } -void ICommand::printHelpMessage() const +CommandLineOptions ICommand::processCommandLineArguments(const Strings & commands) { - std::cout << "Command: " << command_name << '\n'; - std::cout << "Description: " << description << '\n'; - std::cout << "Usage: " << usage << '\n'; + CommandLineOptions options; + auto parser = po::command_line_parser(commands); + parser.options(options_description).positional(positional_options_description).allow_unregistered(); - if (command_option_description) + po::parsed_options parsed = parser.run(); + po::store(parsed, options); + + return options; +} + +void ICommand::execute(const Strings & commands, DisksClient & client) +{ + try { - auto options = *command_option_description; - if (!options.options().empty()) - std::cout << options << '\n'; + processCommandLineArguments(commands); + } + catch (std::exception & exc) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}", exc.what()); + } + return executeImpl(processCommandLineArguments(commands), client); +} + +DiskWithPath & ICommand::getDiskWithPath(DisksClient & client, const CommandLineOptions & options, const String & name) +{ + auto disk_name = getValueFromCommandLineOptionsWithOptional(options, name); + if (disk_name.has_value()) + { + return client.getDiskWithPath(disk_name.value()); + } + else + { + return client.getCurrentDiskWithPath(); } } -void ICommand::addOptions(ProgramOptionsDescription & options_description) -{ - if (!command_option_description || command_option_description->options().empty()) - return; - - options_description.add(*command_option_description); -} - -String ICommand::validatePathAndGetAsRelative(const String & path) -{ - /// If path contain non-normalized symbols like . we will normalized them. If the resulting normalized path - /// still contain '..' it can be dangerous, disallow such paths. Also since clickhouse-disks - /// is not an interactive program (don't track you current path) it's OK to disallow .. paths. - String lexically_normal_path = fs::path(path).lexically_normal(); - if (lexically_normal_path.find("..") != std::string::npos) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Path {} is not normalized", path); - - /// If path is absolute we should keep it as relative inside disk, so disk will look like - /// an ordinary filesystem with root. - if (fs::path(lexically_normal_path).is_absolute()) - return lexically_normal_path.substr(1); - - return lexically_normal_path; -} - } diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h index efe350fe87b..bf10841b636 100644 --- a/programs/disks/ICommand.h +++ b/programs/disks/ICommand.h @@ -1,66 +1,133 @@ #pragma once -#include +#include #include +#include #include -#include +#include #include +#include "Common/Exception.h" +#include -#include +#include +#include + +#include "DisksApp.h" + +#include "DisksClient.h" + +#include "ICommand_fwd.h" namespace DB { +// namespace po = boost::program_options; namespace po = boost::program_options; -using ProgramOptionsDescription = boost::program_options::options_description; -using CommandLineOptions = boost::program_options::variables_map; +using ProgramOptionsDescription = po::options_description; +using PositionalProgramOptionsDescription = po::positional_options_description; +using CommandLineOptions = po::variables_map; + +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} class ICommand { public: - ICommand() = default; + explicit ICommand() = default; virtual ~ICommand() = default; - virtual void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) = 0; + void execute(const Strings & commands, DisksClient & client); - const std::optional & getCommandOptions() const { return command_option_description; } + virtual void executeImpl(const CommandLineOptions & options, DisksClient & client) = 0; - void addOptions(ProgramOptionsDescription & options_description); + CommandLineOptions processCommandLineArguments(const Strings & commands); - virtual void processOptions(Poco::Util::LayeredConfiguration & config, po::variables_map & options) const = 0; + void exit() { options_parsed = false; } protected: - void printHelpMessage() const; + template + static T getValueFromCommandLineOptions(const CommandLineOptions & options, const String & name) + { + try + { + return options[name].as(); + } + catch (...) + { + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Argument {} has wrong type and can't be parsed", name); + } + } + + template + static T getValueFromCommandLineOptionsThrow(const CommandLineOptions & options, const String & name) + { + if (options.count(name)) + { + return getValueFromCommandLineOptions(options, name); + } + else + { + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Mandatory argument {} is missing", name); + } + } + + template + static T getValueFromCommandLineOptionsWithDefault(const CommandLineOptions & options, const String & name, const T & default_value) + { + if (options.count(name)) + { + return getValueFromCommandLineOptions(options, name); + } + else + { + return default_value; + } + } + + template + static std::optional getValueFromCommandLineOptionsWithOptional(const CommandLineOptions & options, const String & name) + { + if (options.count(name)) + { + return std::optional{getValueFromCommandLineOptions(options, name)}; + } + else + { + return std::nullopt; + } + } + + DiskWithPath & getDiskWithPath(DisksClient & client, const CommandLineOptions & options, const String & name); - static String validatePathAndGetAsRelative(const String & path); public: String command_name; String description; + ProgramOptionsDescription options_description; protected: - std::optional command_option_description; - String usage; - po::positional_options_description positional_options_description; + PositionalProgramOptionsDescription positional_options_description; + +private: + bool options_parsed{}; }; -using CommandPtr = std::unique_ptr; - -} - DB::CommandPtr makeCommandCopy(); -DB::CommandPtr makeCommandLink(); -DB::CommandPtr makeCommandList(); DB::CommandPtr makeCommandListDisks(); +DB::CommandPtr makeCommandList(); +DB::CommandPtr makeCommandChangeDirectory(); +DB::CommandPtr makeCommandLink(); DB::CommandPtr makeCommandMove(); DB::CommandPtr makeCommandRead(); DB::CommandPtr makeCommandRemove(); DB::CommandPtr makeCommandWrite(); DB::CommandPtr makeCommandMkDir(); DB::CommandPtr makeCommandPackedIO(); +DB::CommandPtr makeCommandSwitchDisk(); + +} diff --git a/programs/disks/ICommand_fwd.h b/programs/disks/ICommand_fwd.h new file mode 100644 index 00000000000..f45b6c8d17c --- /dev/null +++ b/programs/disks/ICommand_fwd.h @@ -0,0 +1,10 @@ +#include +#include + + +namespace DB +{ +class ICommand; + +using CommandPtr = std::shared_ptr; +} diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index a9260a249dd..6f10f5f701e 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -7,7 +7,6 @@ #include #include -#include namespace DB { @@ -27,15 +26,22 @@ void DiskSelector::assertInitialized() const } -void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator) +void DiskSelector::initialize( + const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator) { Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix, keys); + std::cerr << "Config Prefix: " << config_prefix << std::endl; + for (auto & key : keys) + std::cerr << "Key inside disk selector initialize: " << key; + std::cerr << std::endl; auto & factory = DiskFactory::instance(); constexpr auto default_disk_name = "default"; bool has_default_disk = false; + constexpr auto local_disk_name = "local"; + bool has_local_disk = false; for (const auto & disk_name : keys) { if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII)) @@ -44,6 +50,9 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, if (disk_name == default_disk_name) has_default_disk = true; + if (disk_name == local_disk_name) + has_local_disk = true; + const auto disk_config_prefix = config_prefix + "." + disk_name; if (disk_validator && !disk_validator(config, disk_config_prefix, disk_name)) @@ -54,11 +63,12 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, if (!has_default_disk) { disks.emplace( - default_disk_name, - std::make_shared( - default_disk_name, context->getPath(), 0, context, config, config_prefix)); + default_disk_name, std::make_shared(default_disk_name, context->getPath(), 0, context, config, config_prefix)); } + if (!has_local_disk) + disks.emplace(local_disk_name, std::make_shared(local_disk_name, "/", 0, context, config, config_prefix)); + is_initialized = true; } From bc21d0fb27bdfff61f7c29a2682d836e336394a5 Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 27 May 2024 11:49:59 +0000 Subject: [PATCH 079/439] Minor fix --- programs/disks/CommandList.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index 26a576abc7d..16d249e299d 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -75,12 +75,11 @@ private: } std::cout << "\n"; - for (const auto & file_name : file_names) + for (const auto & file_name : selected_and_sorted_file_names) { auto path = absolute_path + "/" + file_name; if (disk.isDirectory(path)) - if (show_hidden || (!file_name.starts_with('.'))) - listRecursive(disk, path, show_hidden); + listRecursive(disk, path, show_hidden); } } }; From 90365ef0a6b7408a7ca62830dcb50bdf011a7584 Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 27 May 2024 11:57:20 +0000 Subject: [PATCH 080/439] Remove garbage logs --- src/Disks/DiskSelector.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index 6f10f5f701e..77e2299ed65 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -31,10 +31,6 @@ void DiskSelector::initialize( { Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix, keys); - std::cerr << "Config Prefix: " << config_prefix << std::endl; - for (auto & key : keys) - std::cerr << "Key inside disk selector initialize: " << key; - std::cerr << std::endl; auto & factory = DiskFactory::instance(); From 846d46075d1c42666be7b090828cc7bca64f64c9 Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 27 May 2024 12:17:04 +0000 Subject: [PATCH 081/439] Remove comments --- programs/disks/CommandChangeDirectory.cpp | 7 ------- programs/disks/CommandCopy.cpp | 5 ----- programs/disks/CommandLink.cpp | 5 ----- programs/disks/CommandList.cpp | 5 ----- programs/disks/CommandListDisks.cpp | 5 ----- programs/disks/CommandMkDir.cpp | 5 ----- programs/disks/CommandMove.cpp | 5 ----- programs/disks/CommandRead.cpp | 6 ------ programs/disks/CommandRemove.cpp | 5 ----- programs/disks/CommandSwitchDisk.cpp | 7 ------- programs/disks/CommandWrite.cpp | 6 ------ programs/disks/DisksApp.cpp | 2 +- programs/disks/DisksApp.h | 6 ++++++ programs/disks/DisksClient.h | 2 +- 14 files changed, 8 insertions(+), 63 deletions(-) diff --git a/programs/disks/CommandChangeDirectory.cpp b/programs/disks/CommandChangeDirectory.cpp index 9932d918099..3baf69f8be0 100644 --- a/programs/disks/CommandChangeDirectory.cpp +++ b/programs/disks/CommandChangeDirectory.cpp @@ -8,11 +8,6 @@ namespace DB { -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -} - class CommandChangeDirectory final : public ICommand { public: @@ -28,9 +23,7 @@ public: void executeImpl(const CommandLineOptions & options, DisksClient & client) override { DiskWithPath & disk = getDiskWithPath(client, options, "disk"); - // std::cerr << "Disk name: " << disk.getDisk()->getName() << std::endl; String path = getValueFromCommandLineOptionsThrow(options, "path"); - // std::cerr << "Disk path: " << path << std::endl; disk.setPath(path); } }; diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index e853e054f97..ae749f7448a 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -6,11 +6,6 @@ namespace DB { -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -} - class CommandCopy final : public ICommand { public: diff --git a/programs/disks/CommandLink.cpp b/programs/disks/CommandLink.cpp index 8b467891d18..7e80faf9fc6 100644 --- a/programs/disks/CommandLink.cpp +++ b/programs/disks/CommandLink.cpp @@ -4,11 +4,6 @@ namespace DB { -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -} - class CommandLink final : public ICommand { public: diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index 16d249e299d..f91f0c6455c 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -7,11 +7,6 @@ namespace DB { -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -} - class CommandList final : public ICommand { public: diff --git a/programs/disks/CommandListDisks.cpp b/programs/disks/CommandListDisks.cpp index 16779b0fdae..9fb67fed5e0 100644 --- a/programs/disks/CommandListDisks.cpp +++ b/programs/disks/CommandListDisks.cpp @@ -7,11 +7,6 @@ namespace DB { -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -} - class CommandListDisks final : public ICommand { public: diff --git a/programs/disks/CommandMkDir.cpp b/programs/disks/CommandMkDir.cpp index 23312435d4e..895602adf72 100644 --- a/programs/disks/CommandMkDir.cpp +++ b/programs/disks/CommandMkDir.cpp @@ -6,11 +6,6 @@ namespace DB { -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -} - class CommandMkDir final : public ICommand { public: diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 25620de448e..fb2fce2fa61 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -4,11 +4,6 @@ namespace DB { -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -} - class CommandMove final : public ICommand { public: diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp index 82ff90b6e02..6963824b5cc 100644 --- a/programs/disks/CommandRead.cpp +++ b/programs/disks/CommandRead.cpp @@ -8,18 +8,12 @@ namespace DB { -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -} - class CommandRead final : public ICommand { public: CommandRead() { command_name = "read"; - // command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); description = "Read a file from `FROM_PATH` to `TO_PATH`"; options_description.add_options()( "path-from", po::value(), "file from which we are reading, defaults to `stdin` (mandatory, positional)")( diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index 0344a09d156..f332267c780 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -4,11 +4,6 @@ namespace DB { -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -} - class CommandRemove final : public ICommand { public: diff --git a/programs/disks/CommandSwitchDisk.cpp b/programs/disks/CommandSwitchDisk.cpp index 6c1fbaa0623..e59a1fc8e87 100644 --- a/programs/disks/CommandSwitchDisk.cpp +++ b/programs/disks/CommandSwitchDisk.cpp @@ -8,20 +8,13 @@ namespace DB { -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -} - class CommandSwitchDisk final : public ICommand { public: explicit CommandSwitchDisk() : ICommand() { command_name = "switch-disk"; - // options_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); description = "Change disk"; - // options_description->add_options()("recursive", "recursively list all directories"); options_description.add_options()("disk", po::value(), "the disk to switch to (mandatory, positional)")( "path", po::value(), "the path to switch on the disk"); positional_options_description.add("disk", 1); diff --git a/programs/disks/CommandWrite.cpp b/programs/disks/CommandWrite.cpp index 42999572443..e8b3a0741ba 100644 --- a/programs/disks/CommandWrite.cpp +++ b/programs/disks/CommandWrite.cpp @@ -9,18 +9,12 @@ namespace DB { -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -} - class CommandWrite final : public ICommand { public: CommandWrite() { command_name = "write"; - // command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); description = "Write a file from `FROM_PATH` to `TO_PATH`"; options_description.add_options()("path-from", po::value(), "file from which we are reading, defaults to `stdin`")( "path-to", po::value(), "file to which we are writing (mandatory, positional)"); diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 02e8b74b889..10eb3f986b9 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -257,7 +257,7 @@ void DisksApp::addOptions() command_descriptions.emplace("mkdir", makeCommandMkDir()); command_descriptions.emplace("switch-disk", makeCommandSwitchDisk()); #ifdef CLICKHOUSE_CLOUD - // command_descriptions.emplace("packed-io", makeCommandPackedIO()); + command_descriptions.emplace("packed-io", makeCommandPackedIO()); #endif } diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h index a0ce98b51d0..7c9150cd1ce 100644 --- a/programs/disks/DisksApp.h +++ b/programs/disks/DisksApp.h @@ -15,6 +15,12 @@ namespace DB { +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int LOGICAL_ERROR; +}; + using ProgramOptionsDescription = boost::program_options::options_description; using CommandLineOptions = boost::program_options::variables_map; diff --git a/programs/disks/DisksClient.h b/programs/disks/DisksClient.h index e3b8cf7c8a9..89d5ecce666 100644 --- a/programs/disks/DisksClient.h +++ b/programs/disks/DisksClient.h @@ -24,7 +24,6 @@ namespace fs = std::filesystem; namespace DB { - std::vector split(const String & text, const String & delimiters); using ProgramOptionsDescription = boost::program_options::options_description; @@ -34,6 +33,7 @@ using CommandLineOptions = boost::program_options::variables_map; namespace ErrorCodes { extern const int BAD_ARGUMENTS; +extern const int LOGICAL_ERROR; }; class DiskWithPath From 4574ee75041ad945b276cf9cb5f3aa9f635e73a0 Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 27 May 2024 12:45:05 +0000 Subject: [PATCH 082/439] Correct style check --- programs/disks/CommandSwitchDisk.cpp | 5 +++++ programs/disks/DisksApp.cpp | 7 +++++++ programs/disks/DisksApp.h | 6 ------ programs/disks/ICommand_fwd.h | 4 ++-- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/programs/disks/CommandSwitchDisk.cpp b/programs/disks/CommandSwitchDisk.cpp index e59a1fc8e87..0eb7ced7abf 100644 --- a/programs/disks/CommandSwitchDisk.cpp +++ b/programs/disks/CommandSwitchDisk.cpp @@ -8,6 +8,11 @@ namespace DB { +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +}; + class CommandSwitchDisk final : public ICommand { public: diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 10eb3f986b9..dd3c60c7630 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -24,6 +24,13 @@ namespace DB { +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int LOGICAL_ERROR; +}; + + CommandPtr DisksApp::getCommandByName(String command) const { auto it = aliases.find(command); diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h index 7c9150cd1ce..a0ce98b51d0 100644 --- a/programs/disks/DisksApp.h +++ b/programs/disks/DisksApp.h @@ -15,12 +15,6 @@ namespace DB { -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -extern const int LOGICAL_ERROR; -}; - using ProgramOptionsDescription = boost::program_options::options_description; using CommandLineOptions = boost::program_options::variables_map; diff --git a/programs/disks/ICommand_fwd.h b/programs/disks/ICommand_fwd.h index f45b6c8d17c..84310b4a18d 100644 --- a/programs/disks/ICommand_fwd.h +++ b/programs/disks/ICommand_fwd.h @@ -1,6 +1,6 @@ -#include -#include +#pragma once +#include namespace DB { From 4c2d8a1378f775b94eed7fdde47df8a03407288f Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 27 May 2024 14:26:20 +0000 Subject: [PATCH 083/439] Add highlighting --- programs/disks/CommandChangeDirectory.cpp | 1 - programs/disks/DisksApp.cpp | 6 ++++-- programs/disks/DisksClient.cpp | 4 ---- programs/disks/DisksClient.h | 6 ------ programs/disks/ICommand.cpp | 1 - programs/disks/ICommand.h | 6 +++--- 6 files changed, 7 insertions(+), 17 deletions(-) diff --git a/programs/disks/CommandChangeDirectory.cpp b/programs/disks/CommandChangeDirectory.cpp index 3baf69f8be0..ce79d43db30 100644 --- a/programs/disks/CommandChangeDirectory.cpp +++ b/programs/disks/CommandChangeDirectory.cpp @@ -1,5 +1,4 @@ #include -#include "Common/Exception.h" #include #include "DisksApp.h" #include "DisksClient.h" diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index dd3c60c7630..a990d85a9d1 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -227,9 +227,11 @@ void DisksApp::runInteractiveReplxx() while (true) { - String prompt = client->getCurrentDiskWithPath().getPrompt(); + DiskWithPath disk_with_path = client->getCurrentDiskWithPath(); + String prompt = "\x1b[1;34m" + disk_with_path.getDisk()->getName() + "\x1b[0m:" + "\x1b[1;31m" + disk_with_path.getCurrentPath() + + "\x1b[0m$ "; - auto input = lr.readLine(prompt, ":-] "); + auto input = lr.readLine(prompt, "\x1b[1;31m:-] \x1b[0m"); if (input.empty()) break; diff --git a/programs/disks/DisksClient.cpp b/programs/disks/DisksClient.cpp index 3c258b5aa6e..8e90f0a82c1 100644 --- a/programs/disks/DisksClient.cpp +++ b/programs/disks/DisksClient.cpp @@ -7,10 +7,6 @@ #include #include #include -#include "ICommand.h" - -#include -#include #include diff --git a/programs/disks/DisksClient.h b/programs/disks/DisksClient.h index 89d5ecce666..cc9b1015bad 100644 --- a/programs/disks/DisksClient.h +++ b/programs/disks/DisksClient.h @@ -1,6 +1,5 @@ #pragma once -#include <__tuple> #include #include #include @@ -8,10 +7,7 @@ #include #include #include -#include "Disks/DiskSelector.h" #include "Disks/IDisk.h" -#include "ICommand_fwd.h" -#include "IO/ReadHelpers.h" #include #include @@ -64,8 +60,6 @@ public: } } - String getPrompt() { return disk->getName() + ":" + path + "$ "; } - String getAbsolutePath(const String & any_path) const { return normalizePath(fs::path(path) / any_path); } String getCurrentPath() const { return path; } diff --git a/programs/disks/ICommand.cpp b/programs/disks/ICommand.cpp index 41610f1086f..7a70a61bf6c 100644 --- a/programs/disks/ICommand.cpp +++ b/programs/disks/ICommand.cpp @@ -1,5 +1,4 @@ #include "ICommand.h" -#include #include "DisksClient.h" diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h index bf10841b636..480b42d4f7a 100644 --- a/programs/disks/ICommand.h +++ b/programs/disks/ICommand.h @@ -23,7 +23,6 @@ namespace DB { -// namespace po = boost::program_options; namespace po = boost::program_options; using ProgramOptionsDescription = po::options_description; using PositionalProgramOptionsDescription = po::positional_options_description; @@ -127,7 +126,8 @@ DB::CommandPtr makeCommandRead(); DB::CommandPtr makeCommandRemove(); DB::CommandPtr makeCommandWrite(); DB::CommandPtr makeCommandMkDir(); -DB::CommandPtr makeCommandPackedIO(); DB::CommandPtr makeCommandSwitchDisk(); - +#ifdef CLICKHOUSE_CLOUD +DB::CommandPtr makeCommandPackedIO(); +#endif } From a9000bd82173b2a57290ec75fee4bb80b51f9fd1 Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 28 May 2024 13:17:49 +0000 Subject: [PATCH 084/439] Fix bugs --- programs/disks/CMakeLists.txt | 16 ++++++++-------- programs/disks/CommandChangeDirectory.cpp | 2 +- programs/disks/CommandLink.cpp | 4 ++-- programs/disks/CommandMove.cpp | 4 ++-- programs/disks/CommandRead.cpp | 20 +++++++++----------- programs/disks/CommandSwitchDisk.cpp | 6 +----- programs/disks/CommandWrite.cpp | 5 +---- programs/disks/DisksClient.h | 3 +-- programs/disks/ICommand.h | 4 ++-- 9 files changed, 27 insertions(+), 37 deletions(-) diff --git a/programs/disks/CMakeLists.txt b/programs/disks/CMakeLists.txt index c5b30d61706..0f3cb601750 100644 --- a/programs/disks/CMakeLists.txt +++ b/programs/disks/CMakeLists.txt @@ -1,18 +1,18 @@ set (CLICKHOUSE_DISKS_SOURCES - ICommand.cpp - DisksClient.cpp DisksApp.cpp - CommandCopy.cpp - CommandListDisks.cpp - CommandList.cpp - CommandLink.cpp + DisksClient.cpp + ICommand.cpp CommandChangeDirectory.cpp + CommandCopy.cpp + CommandLink.cpp + CommandList.cpp + CommandListDisks.cpp CommandMkDir.cpp CommandMove.cpp CommandRead.cpp CommandRemove.cpp - CommandWrite.cpp - CommandSwitchDisk.cpp) + CommandSwitchDisk.cpp + CommandWrite.cpp) if (CLICKHOUSE_CLOUD) set (CLICKHOUSE_DISKS_SOURCES ${CLICKHOUSE_DISKS_SOURCES} CommandPackedIO.cpp) diff --git a/programs/disks/CommandChangeDirectory.cpp b/programs/disks/CommandChangeDirectory.cpp index ce79d43db30..5e6a08cd3fd 100644 --- a/programs/disks/CommandChangeDirectory.cpp +++ b/programs/disks/CommandChangeDirectory.cpp @@ -14,7 +14,7 @@ public: { command_name = "cd"; description = "Change directory"; - options_description.add_options()("path", po::value(), "the path of listing (mandatory, positional)")( + options_description.add_options()("path", po::value(), "the path we want to get to (mandatory, positional)")( "disk", po::value(), "A disk where the path is changed"); positional_options_description.add("path", 1); } diff --git a/programs/disks/CommandLink.cpp b/programs/disks/CommandLink.cpp index 7e80faf9fc6..74707160f67 100644 --- a/programs/disks/CommandLink.cpp +++ b/programs/disks/CommandLink.cpp @@ -12,8 +12,8 @@ public: command_name = "link"; description = "Create hardlink from `from_path` to `to_path`"; options_description.add_options()( - "path-to", po::value(), "the path from which a hard link will be created (mandatory, positional)")( - "path-from", po::value(), "the path where a hard link will be created (mandatory, positional)"); + "path-from", po::value(), "the path where a hard link will be created (mandatory, positional)")( + "path-to", po::value(), "the path from which a hard link will be created (mandatory, positional)"); positional_options_description.add("path-from", 1); positional_options_description.add("path-to", 1); } diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index fb2fce2fa61..23144df3d35 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -25,9 +25,9 @@ public: String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); if (disk.getDisk()->isFile(path_from)) - disk.getDisk()->moveFile(path_from, path_from); + disk.getDisk()->moveFile(path_from, path_to); else - disk.getDisk()->moveDirectory(path_from, path_from); + disk.getDisk()->moveDirectory(path_from, path_to); } }; diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp index 6963824b5cc..5c7daa14bf4 100644 --- a/programs/disks/CommandRead.cpp +++ b/programs/disks/CommandRead.cpp @@ -2,6 +2,7 @@ #include #include #include +#include "Common/Exception.h" #include #include "ICommand.h" @@ -15,9 +16,8 @@ public: { command_name = "read"; description = "Read a file from `FROM_PATH` to `TO_PATH`"; - options_description.add_options()( - "path-from", po::value(), "file from which we are reading, defaults to `stdin` (mandatory, positional)")( - "path-to", po::value(), "file to which we are writing"); + options_description.add_options()("path-from", po::value(), "file from which we are reading (mandatory, positional)")( + "path-to", po::value(), "file to which we are writing, , defaults to `stdout`"); positional_options_description.add("path-from", 1); } @@ -25,26 +25,24 @@ public: { auto disk = client.getCurrentDiskWithPath(); String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + std::cerr << path_from << std::endl; std::optional path_to = getValueFromCommandLineOptionsWithOptional(options, "path-to"); - if (path_to.has_value()) - { - path_to = std::optional{disk.getRelativeFromRoot(path_to.value())}; - } auto in = disk.getDisk()->readFile(path_from); + std::unique_ptr out = {}; if (path_to.has_value()) { String relative_path_to = disk.getRelativeFromRoot(path_to.value()); - - auto out = disk.getDisk()->writeFile(relative_path_to); + out = disk.getDisk()->writeFile(relative_path_to); copyData(*in, *out); - out->finalize(); } else { - std::unique_ptr out = std::make_unique(STDOUT_FILENO); + out = std::make_unique(STDOUT_FILENO); copyData(*in, *out); + out->write('\n'); } + out->finalize(); } }; diff --git a/programs/disks/CommandSwitchDisk.cpp b/programs/disks/CommandSwitchDisk.cpp index 0eb7ced7abf..285afe0b7ce 100644 --- a/programs/disks/CommandSwitchDisk.cpp +++ b/programs/disks/CommandSwitchDisk.cpp @@ -30,11 +30,7 @@ public: String disk = getValueFromCommandLineOptions(options, "disk"); std::optional path = getValueFromCommandLineOptionsWithOptional(options, "path"); - if (!client.switchToDisk(disk, path)) - { - throw Exception( - ErrorCodes::BAD_ARGUMENTS, "Unable to switch to disk: {}, path: {}", disk, path.has_value() ? path.value() : "NO PATH"); - } + client.switchToDisk(disk, path); } }; diff --git a/programs/disks/CommandWrite.cpp b/programs/disks/CommandWrite.cpp index e8b3a0741ba..433ebb3d5cf 100644 --- a/programs/disks/CommandWrite.cpp +++ b/programs/disks/CommandWrite.cpp @@ -21,15 +21,12 @@ public: positional_options_description.add("path-to", 1); } + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { auto disk = client.getCurrentDiskWithPath(); std::optional path_from = getValueFromCommandLineOptionsWithOptional(options, "path-from"); - if (path_from.has_value()) - { - path_from = std::optional{disk.getRelativeFromRoot(path_from.value())}; - } String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); diff --git a/programs/disks/DisksClient.h b/programs/disks/DisksClient.h index cc9b1015bad..0fc20125e21 100644 --- a/programs/disks/DisksClient.h +++ b/programs/disks/DisksClient.h @@ -252,7 +252,7 @@ public: DiskPtr getDisk(const String & disk) const { return getDiskWithPath(disk).getDisk(); } - bool switchToDisk(const String & disk_, const std::optional & path_) + void switchToDisk(const String & disk_, const std::optional & path_) { if (disks.contains(disk_)) { @@ -261,7 +261,6 @@ public: disks.at(disk_).setPath(path_.value()); } current_disk = disk_; - return true; } else { diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h index 480b42d4f7a..1e05aefd28b 100644 --- a/programs/disks/ICommand.h +++ b/programs/disks/ICommand.h @@ -58,7 +58,7 @@ protected: } catch (...) { - throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Argument {} has wrong type and can't be parsed", name); + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Argument '{}' has wrong type and can't be parsed", name); } } @@ -71,7 +71,7 @@ protected: } else { - throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Mandatory argument {} is missing", name); + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Mandatory argument '{}' is missing", name); } } From 856a0e35f25f8733334c4b5aa86fa2deb1da590a Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 28 May 2024 13:30:34 +0000 Subject: [PATCH 085/439] Remove redundant includes --- programs/disks/CommandRead.cpp | 1 - programs/disks/CommandSwitchDisk.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp index 5c7daa14bf4..ea05d25fb44 100644 --- a/programs/disks/CommandRead.cpp +++ b/programs/disks/CommandRead.cpp @@ -2,7 +2,6 @@ #include #include #include -#include "Common/Exception.h" #include #include "ICommand.h" diff --git a/programs/disks/CommandSwitchDisk.cpp b/programs/disks/CommandSwitchDisk.cpp index 285afe0b7ce..22d56673832 100644 --- a/programs/disks/CommandSwitchDisk.cpp +++ b/programs/disks/CommandSwitchDisk.cpp @@ -1,6 +1,5 @@ #include #include -#include "Common/Exception.h" #include #include "DisksApp.h" #include "ICommand.h" From ee3385fbc00151427a209398a881f882aef6512b Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 28 May 2024 18:56:41 +0200 Subject: [PATCH 086/439] adjust after merge with master --- src/Storages/ObjectStorage/StorageObjectStorageSink.cpp | 4 ++-- src/Storages/ObjectStorage/StorageObjectStorageSink.h | 2 +- src/Storages/StorageAzureBlob.cpp | 2 +- tests/integration/helpers/s3_mocks/broken_s3.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index 0a3cf19a590..9718b329414 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -39,12 +39,12 @@ StorageObjectStorageSink::StorageObjectStorageSink( configuration->format, *write_buf, sample_block, context, format_settings_); } -void StorageObjectStorageSink::consume(Chunk chunk) +void StorageObjectStorageSink::consume(Chunk & chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void StorageObjectStorageSink::onCancel() diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index 45cf83d606f..1ec52889f0a 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "StorageObjectStorageSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onCancel() override; diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 5dc407bf86d..a1ce991b5c9 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -1,4 +1,4 @@ -#include +#include #if USE_AZURE_BLOB_STORAGE #include diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py index 7d0127bc1c4..a8d407e8d79 100644 --- a/tests/integration/helpers/s3_mocks/broken_s3.py +++ b/tests/integration/helpers/s3_mocks/broken_s3.py @@ -231,7 +231,7 @@ class _ServerRuntime: class BrokenPipeAction: def inject_error(self, request_handler): # partial read - self.rfile.read(50) + request_handler.rfile.read(50) time.sleep(1) request_handler.connection.setsockopt( From 1b7db4195c1f0e5be62ab7a3784deebc2481f666 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 29 May 2024 02:18:50 +0200 Subject: [PATCH 087/439] work with tests --- src/Interpreters/InterpreterInsertQuery.cpp | 13 +- src/Interpreters/SquashingTransform.cpp | 8 +- src/Processors/ISimpleTransform.h | 2 - .../Transforms/CountingTransform.cpp | 3 +- .../Transforms/ExpressionTransform.cpp | 3 - .../Transforms/MaterializingTransform.cpp | 3 - .../Transforms/SquashingChunksTransform.cpp | 52 +- .../Transforms/buildPushingToViewsChain.cpp | 6 +- src/Storages/MergeTree/MergeTreeSink.cpp | 5 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 6 +- src/Storages/StorageAzureBlob.cpp | 1638 ------------ src/Storages/StorageS3.cpp | 2310 ----------------- .../0_stateless/01275_parallel_mv.reference | 4 +- ...01927_query_views_log_current_database.sql | 1 + ...ication_token_materialized_views.reference | 14 +- ...deduplication_token_materialized_views.sql | 8 +- .../0_stateless/02125_query_views_log.sql | 2 +- 17 files changed, 82 insertions(+), 3996 deletions(-) delete mode 100644 src/Storages/StorageAzureBlob.cpp delete mode 100644 src/Storages/StorageS3.cpp diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index cd68cbc41c0..249c69b51b9 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -574,6 +574,8 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & return counting; }); + size_t num_select_threads = pipeline.getNumThreads(); + pipeline.resize(1); if (shouldAddSquashingFroStorage(table)) @@ -616,8 +618,18 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & /// Otherwise ResizeProcessor them down to 1 stream. size_t presink_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); + size_t sink_streams_size = table->supportsParallelInsert() ? std::max(1, settings.max_insert_threads) : 1; + if (!settings.parallel_view_processing) + { + auto table_id = table->getStorageID(); + auto views = DatabaseCatalog::instance().getDependentViews(table_id); + + if (table->isView() || !views.empty()) + sink_streams_size = 1; + } + auto [presink_chains, sink_chains] = buildPreAndSyncChains( presink_streams_size, sink_streams_size, table, metadata_snapshot, query_sample_block); @@ -636,7 +648,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & if (!settings.parallel_view_processing) { - size_t num_select_threads = pipeline.getNumThreads(); /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. if (pipeline.getNumThreads() > num_select_threads) pipeline.setMaxThreads(num_select_threads); diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index 8a902add9a5..30c801aaaff 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -82,9 +83,8 @@ void SquashingTransform::append(Block && input_block) { for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) { - const auto source_column = input_block.getByPosition(i).column; - - const auto acc_column = accumulated_block.getByPosition(i).column; + const auto source_column = std::move(input_block.getByPosition(i).column); + auto acc_column = std::move(accumulated_block.getByPosition(i).column); LOG_DEBUG(getLogger("SquashingTransform"), "column {} {}, acc rows {}, size {}, allocated {}, input rows {} size {} allocated {}", @@ -93,7 +93,7 @@ void SquashingTransform::append(Block && input_block) source_column->size(), source_column->byteSize(), source_column->allocatedBytes()); - auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); + auto mutable_column = IColumn::mutate(std::move(acc_column)); mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); accumulated_block.getByPosition(i).column = std::move(mutable_column); } diff --git a/src/Processors/ISimpleTransform.h b/src/Processors/ISimpleTransform.h index 3862ea76dbb..a47e0e49121 100644 --- a/src/Processors/ISimpleTransform.h +++ b/src/Processors/ISimpleTransform.h @@ -31,8 +31,6 @@ protected: virtual void transform(Chunk & input_chunk, Chunk & output_chunk) { - LOG_DEBUG(getLogger("ISimpleTransform"), - "transform {}", input_chunk.getNumRows()); transform(input_chunk); output_chunk.swap(input_chunk); } diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 7329a196f8a..c138eed69de 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -3,6 +3,7 @@ #include #include #include +#include "IO/Progress.h" namespace ProfileEvents @@ -18,7 +19,7 @@ namespace DB void CountingTransform::onConsume(Chunk chunk) { LOG_DEBUG(getLogger("CountingTransform"), - "onConsume {}", chunk.getNumRows()); + "onConsume rows {} bytes {}, progress rows {} bytes {}", chunk.getNumRows(), chunk.bytes(), progress.written_rows, progress.written_bytes); if (quota) quota->used(QuotaType::WRITTEN_BYTES, chunk.bytes()); diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index db5d2b0c49c..73d41828bc0 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -21,9 +21,6 @@ ExpressionTransform::ExpressionTransform(const Block & header_, ExpressionAction void ExpressionTransform::transform(Chunk & chunk) { - LOG_DEBUG(getLogger("ExpressionTransform"), - "transform {}", chunk.getNumRows()); - size_t num_rows = chunk.getNumRows(); auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index 8366472f876..4a7f5187c75 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -12,9 +12,6 @@ MaterializingTransform::MaterializingTransform(const Block & header) void MaterializingTransform::transform(Chunk & chunk) { - LOG_DEBUG(getLogger("MaterializingTransform"), - "transform {}", chunk.getNumRows()); - auto num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 22171d97b6e..2ee13c05b95 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -20,20 +20,32 @@ void SquashingChunksTransform::onConsume(Chunk chunk) LOG_DEBUG(getLogger("SquashingChunksTransform"), "onConsume {}", chunk.getNumRows()); - if (cur_chunkinfos.empty()) - cur_chunkinfos = chunk.getChunkInfos().clone(); - auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + cur_chunk = Chunk(result.block.getColumns(), result.block.rows()); + if (result.block) { cur_chunk.setColumns(result.block.getColumns(), result.block.rows()); - cur_chunk.setChunkInfos(std::move(cur_chunkinfos)); - cur_chunkinfos = {}; - } + if (result.input_block_delayed) + { + cur_chunk.setChunkInfos(std::move(cur_chunkinfos)); + cur_chunkinfos = std::move(chunk.getChunkInfos()); + } + else + { + cur_chunk.setChunkInfos(chunk.getChunkInfos()); + cur_chunkinfos = {}; + } - if (cur_chunkinfos.empty() && result.input_block_delayed) + LOG_DEBUG(getLogger("SquashingChunksTransform"), + "got result rows {}, size {}, columns {}, infos: {}/{}", + cur_chunk.getNumRows(), cur_chunk.bytes(), cur_chunk.getNumColumns(), + cur_chunk.getChunkInfos().size(), cur_chunk.getChunkInfos().debug()); + } + else { - cur_chunkinfos = chunk.getChunkInfos().clone(); + assert(!result.input_block_delayed); + cur_chunkinfos = std::move(chunk.getChunkInfos()); } } @@ -85,15 +97,29 @@ void SimpleSquashingChunksTransform::consume(Chunk chunk) auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); - squashed_chunk.setColumns(result.block.getColumns(), result.block.rows()); - if (result.input_block_delayed) + if (result.block) { - squashed_chunk.setChunkInfos(std::move(squashed_info)); - squashed_info = std::move(chunk.getChunkInfos()); + squashed_chunk.setColumns(result.block.getColumns(), result.block.rows()); + if (result.input_block_delayed) + { + squashed_chunk.setChunkInfos(std::move(squashed_info)); + squashed_info = std::move(chunk.getChunkInfos()); + } + else + { + squashed_chunk.setChunkInfos(chunk.getChunkInfos()); + squashed_info = {}; + } + + LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), + "got result rows {}, size {}, columns {}, infos: {}/{}", + squashed_chunk.getNumRows(), squashed_chunk.bytes(), squashed_chunk.getNumColumns(), + squashed_chunk.getChunkInfos().size(), squashed_chunk.getChunkInfos().debug()); } else { - squashed_chunk.setChunkInfos(std::move(chunk.getChunkInfos())); + assert(!result.input_block_delayed); + squashed_info = std::move(chunk.getChunkInfos()); } } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 72897a06c44..d44796610ed 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -24,6 +24,7 @@ #include #include #include +#include "Core/Field.h" #include #include @@ -223,6 +224,7 @@ std::optional generateViewChain( if (disable_deduplication_for_children) { insert_context->setSetting("insert_deduplicate", Field{false}); + insert_context->setSetting("insert_deduplication_token", Field{""}); } // Processing of blocks for MVs is done block by block, and there will @@ -731,8 +733,8 @@ ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { - auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); - state.emplace(process(block, view, *views_data, chunk.getChunkInfos(), disable_deduplication_for_children)); + auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); + state.emplace(process(std::move(block), view, *views_data, std::move(chunk.getChunkInfos()), disable_deduplication_for_children)); } diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index c252d95a5e9..0953cdc5d72 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -100,8 +100,9 @@ void MergeTreeSink::consume(Chunk & chunk) if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo has to be initialized with user token for table: {}", - storage.getStorageID().getNameForLogs()); + "TokenInfo has to be initialized with user token for table: {}, user dedup token {}", + storage.getStorageID().getNameForLogs(), + context->getSettingsRef().insert_deduplication_token.value); if (token_info->tokenInitialized()) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 11c64c97cb7..62d30764ca8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -304,9 +304,9 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo has to be initialized with user token for table: {}", - storage.getStorageID().getNameForLogs()); - + "TokenInfo has to be initialized with user token for table: {} user dedup token {}", + storage.getStorageID().getNameForLogs(), + context->getSettingsRef().insert_deduplication_token.value); if (token_info->tokenInitialized()) { diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp deleted file mode 100644 index a1ce991b5c9..00000000000 --- a/src/Storages/StorageAzureBlob.cpp +++ /dev/null @@ -1,1638 +0,0 @@ -#include - -#if USE_AZURE_BLOB_STORAGE -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -#include - -namespace fs = std::filesystem; - -using namespace Azure::Storage::Blobs; - -namespace CurrentMetrics -{ - extern const Metric ObjectStorageAzureThreads; - extern const Metric ObjectStorageAzureThreadsActive; - extern const Metric ObjectStorageAzureThreadsScheduled; -} - -namespace ProfileEvents -{ - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_ARGUMENTS; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_COMPILE_REGEXP; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int CANNOT_DETECT_FORMAT; - extern const int LOGICAL_ERROR; - extern const int NOT_IMPLEMENTED; -} - -namespace -{ - -const std::unordered_set required_configuration_keys = { - "blob_path", - "container", -}; - -const std::unordered_set optional_configuration_keys = { - "format", - "compression", - "structure", - "compression_method", - "account_name", - "account_key", - "connection_string", - "storage_account_url", -}; - -bool isConnectionString(const std::string & candidate) -{ - return !candidate.starts_with("http"); -} - -} - -void StorageAzureBlob::processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection) -{ - validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); - - if (collection.has("connection_string")) - { - configuration.connection_url = collection.get("connection_string"); - configuration.is_connection_string = true; - } - - if (collection.has("storage_account_url")) - { - configuration.connection_url = collection.get("storage_account_url"); - configuration.is_connection_string = false; - } - - configuration.container = collection.get("container"); - configuration.blob_path = collection.get("blob_path"); - - if (collection.has("account_name")) - configuration.account_name = collection.get("account_name"); - - if (collection.has("account_key")) - configuration.account_key = collection.get("account_key"); - - configuration.structure = collection.getOrDefault("structure", "auto"); - configuration.format = collection.getOrDefault("format", configuration.format); - configuration.compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); -} - - -StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine_args, const ContextPtr & local_context) -{ - StorageAzureBlob::Configuration configuration; - - /// Supported signatures: - /// - /// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression]) - /// - - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - { - processNamedCollectionResult(configuration, *named_collection); - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); - - return configuration; - } - - if (engine_args.size() < 3 || engine_args.size() > 7) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage AzureBlobStorage requires 3 to 7 arguments: " - "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression])"); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - std::unordered_map engine_args_to_idx; - - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); - - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); - configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); - - auto is_format_arg = [] (const std::string & s) -> bool - { - return s == "auto" || FormatFactory::instance().exists(s); - }; - - if (engine_args.size() == 4) - { - //'c1 UInt64, c2 UInt64 - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format or account name specified without account key"); - } - } - else if (engine_args.size() == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - } - } - else if (engine_args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); - } - else - { - configuration.account_name = fourth_arg; - - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - } - } - else if (engine_args.size() == 7) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); - } - } - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); - - return configuration; -} - - -AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(const ContextPtr & local_context) -{ - const auto & context_settings = local_context->getSettingsRef(); - auto settings_ptr = std::make_unique(); - settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; - settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; - settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); - - return settings_ptr; -} - -void registerStorageAzureBlob(StorageFactory & factory) -{ - factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args) - { - auto & engine_args = args.engine_args; - if (engine_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - - auto configuration = StorageAzureBlob::getConfiguration(engine_args, args.getLocalContext()); - auto client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); - // Use format settings from global server context + settings from - // the SETTINGS clause of the create query. Settings from current - // session and user are ignored. - std::optional format_settings; - if (args.storage_def->settings) - { - FormatFactorySettings user_format_settings; - - // Apply changed settings from global context, but ignore the - // unknown ones, because we only have the format settings here. - const auto & changes = args.getContext()->getSettingsRef().changes(); - for (const auto & change : changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.set(change.name, change.value); - } - - // Apply changes from SETTINGS clause, with validation. - user_format_settings.applyChanges(args.storage_def->settings->changes); - format_settings = getFormatSettings(args.getContext(), user_format_settings); - } - else - { - format_settings = getFormatSettings(args.getContext()); - } - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - auto settings = StorageAzureBlob::createSettings(args.getContext()); - - return std::make_shared( - std::move(configuration), - std::make_unique("AzureBlobStorage", std::move(client), std::move(settings),configuration.container), - args.getContext(), - args.table_id, - args.columns, - args.constraints, - args.comment, - format_settings, - /* distributed_processing */ false, - partition_by); - }, - { - .supports_settings = true, - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::AZURE, - }); -} - -static bool containerExists(std::unique_ptr &blob_service_client, std::string container_name) -{ - Azure::Storage::Blobs::ListBlobContainersOptions options; - options.Prefix = container_name; - options.PageSizeHint = 1; - - auto containers_list_response = blob_service_client->ListBlobContainers(options); - auto containers_list = containers_list_response.BlobContainers; - - for (const auto & container : containers_list) - { - if (container_name == container.Name) - return true; - } - return false; -} - -AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container) -{ - AzureClientPtr result; - - if (configuration.is_connection_string) - { - std::shared_ptr managed_identity_credential = std::make_shared(); - std::unique_ptr blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(configuration.connection_url)); - result = std::make_unique(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container)); - - if (attempt_to_create_container) - { - bool container_exists = containerExists(blob_service_client,configuration.container); - if (!container_exists) - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - - try - { - result->CreateIfNotExists(); - } - catch (const Azure::Storage::StorageException & e) - { - if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.")) - { - throw; - } - } - } - } - } - else - { - std::shared_ptr storage_shared_key_credential; - if (configuration.account_name.has_value() && configuration.account_key.has_value()) - { - storage_shared_key_credential - = std::make_shared(*configuration.account_name, *configuration.account_key); - } - - std::unique_ptr blob_service_client; - size_t pos = configuration.connection_url.find('?'); - std::shared_ptr managed_identity_credential; - if (storage_shared_key_credential) - { - blob_service_client = std::make_unique(configuration.connection_url, storage_shared_key_credential); - } - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, workload_identity_credential); - } - else - { - managed_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, managed_identity_credential); - } - } - - std::string final_url; - if (pos != std::string::npos) - { - auto url_without_sas = configuration.connection_url.substr(0, pos); - final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + configuration.container - + configuration.connection_url.substr(pos); - } - else - final_url - = configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container; - - if (!attempt_to_create_container) - { - if (storage_shared_key_credential) - return std::make_unique(final_url, storage_shared_key_credential); - else - return std::make_unique(final_url, managed_identity_credential); - } - - bool container_exists = containerExists(blob_service_client,configuration.container); - if (container_exists) - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - try - { - result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } - catch (const Azure::Storage::StorageException & e) - { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.") - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - throw; - } - } - } - } - - return result; -} - -Poco::URI StorageAzureBlob::Configuration::getConnectionURL() const -{ - if (!is_connection_string) - return Poco::URI(connection_url); - - auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url); - return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); -} - -bool StorageAzureBlob::Configuration::withGlobsIgnorePartitionWildcard() const -{ - if (!withPartitionWildcard()) - return withGlobs(); - - return PartitionedSink::replaceWildcards(getPath(), "").find_first_of("*?{") != std::string::npos; -} - -StorageAzureBlob::StorageAzureBlob( - const Configuration & configuration_, - std::unique_ptr && object_storage_, - const ContextPtr & context, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , name("AzureBlobStorage") - , configuration(configuration_) - , object_storage(std::move(object_storage_)) - , distributed_processing(distributed_processing_) - , format_settings(format_settings_) - , partition_by(partition_by_) -{ - if (configuration.format != "auto") - FormatFactory::instance().checkFormatName(configuration.format); - context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.getConnectionURL()); - - StorageInMemoryMetadata storage_metadata; - if (columns_.empty()) - { - ColumnsDescription columns; - if (configuration.format == "auto") - std::tie(columns, configuration.format) = getTableStructureAndFormatFromData(object_storage.get(), configuration, format_settings, context); - else - columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context); - storage_metadata.setColumns(columns); - } - else - { - if (configuration.format == "auto") - configuration.format = getTableStructureAndFormatFromData(object_storage.get(), configuration, format_settings, context).second; - - /// We don't allow special columns in File storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine AzureBlobStorage doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); - - StoredObjects objects; - for (const auto & key : configuration.blobs_paths) - objects.emplace_back(key); -} - -void StorageAzureBlob::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) -{ - if (configuration.withGlobs()) - { - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode", - configuration.blob_path); - } - - StoredObjects objects; - for (const auto & key : configuration.blobs_paths) - objects.emplace_back(key); - - object_storage->removeObjectsIfExist(objects); -} - -namespace -{ - -class StorageAzureBlobSink : public SinkToStorage -{ -public: - StorageAzureBlobSink( - const String & format, - const Block & sample_block_, - const ContextPtr & context, - std::optional format_settings_, - const CompressionMethod compression_method, - AzureObjectStorage * object_storage, - const String & blob_path) - : SinkToStorage(sample_block_) - , sample_block(sample_block_) - , format_settings(format_settings_) - { - StoredObject object(blob_path); - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - object_storage->writeObject(object, WriteMode::Rewrite), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings); - } - - String getName() const override { return "StorageAzureBlobSink"; } - - void consume(Chunk & chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf->finalize(); - } - - Block sample_block; - std::optional format_settings; - std::unique_ptr write_buf; - OutputFormatPtr writer; - bool cancelled = false; - std::mutex cancel_mutex; -}; - -namespace -{ - std::optional checkAndGetNewFileOnInsertIfNeeded(const ContextPtr & context, AzureObjectStorage * object_storage, const String & path, size_t sequence_number) - { - if (context->getSettingsRef().azure_truncate_on_insert || !object_storage->exists(StoredObject(path))) - return std::nullopt; - - if (context->getSettingsRef().azure_create_new_file_on_insert) - { - auto pos = path.find_first_of('.'); - String new_path; - do - { - new_path = path.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : path.substr(pos)); - ++sequence_number; - } - while (object_storage->exists(StoredObject(new_path))); - - return new_path; - } - - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object with key {} already exists. " - "If you want to overwrite it, enable setting azure_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting azure_create_new_file_on_insert", - path); - } -} - -class PartitionedStorageAzureBlobSink : public PartitionedSink, WithContext -{ -public: - PartitionedStorageAzureBlobSink( - const ASTPtr & partition_by, - const String & format_, - const Block & sample_block_, - const ContextPtr & context_, - std::optional format_settings_, - const CompressionMethod compression_method_, - AzureObjectStorage * object_storage_, - const String & blob_) - : PartitionedSink(partition_by, context_, sample_block_), WithContext(context_) - , format(format_) - , sample_block(sample_block_) - , compression_method(compression_method_) - , object_storage(object_storage_) - , blob(blob_) - , format_settings(format_settings_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto partition_key = replaceWildcards(blob, partition_id); - validateKey(partition_key); - if (auto new_path = checkAndGetNewFileOnInsertIfNeeded(getContext(), object_storage, partition_key, 1)) - partition_key = *new_path; - - return std::make_shared( - format, - sample_block, - getContext(), - format_settings, - compression_method, - object_storage, - partition_key - ); - } - -private: - const String format; - const Block sample_block; - const CompressionMethod compression_method; - AzureObjectStorage * object_storage; - const String blob; - const std::optional format_settings; - - ExpressionActionsPtr partition_by_expr; - - static void validateKey(const String & str) - { - validatePartitionKey(str, true); - } -}; - -} - -class ReadFromAzureBlob : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromAzureBlob"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromAzureBlob( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::shared_ptr storage_, - ReadFromFormatInfo info_, - const bool need_only_count_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) - , storage(std::move(storage_)) - , info(std::move(info_)) - , need_only_count(need_only_count_) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - } - -private: - std::shared_ptr storage; - ReadFromFormatInfo info; - const bool need_only_count; - - size_t max_block_size; - const size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - -void ReadFromAzureBlob::applyFilters(ActionDAGNodes added_filter_nodes) -{ - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); -} - -void StorageAzureBlob::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr local_context, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - if (partition_by && configuration.withPartitionWildcard()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned Azure storage is not implemented yet"); - - auto this_ptr = std::static_pointer_cast(shared_from_this()); - - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && local_context->getSettingsRef().optimize_count_from_files; - - auto reading = std::make_unique( - column_names, - query_info, - storage_snapshot, - local_context, - read_from_format_info.source_header, - std::move(this_ptr), - std::move(read_from_format_info), - need_only_count, - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromAzureBlob::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - const auto & configuration = storage->configuration; - - if (storage->distributed_processing) - { - iterator_wrapper = std::make_shared(context, - context->getReadTaskCallback()); - } - else if (configuration.withGlobs()) - { - /// Iterate through disclosed globs and make a source for each file - iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blob_path, - predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); - } - else - { - iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blobs_paths, - predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); - } -} - -void ReadFromAzureBlob::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - createIterator(nullptr); - - const auto & configuration = storage->configuration; - Pipes pipes; - - for (size_t i = 0; i < num_streams; ++i) - { - pipes.emplace_back(std::make_shared( - info, - configuration.format, - getName(), - context, - storage->format_settings, - max_block_size, - configuration.compression_method, - storage->object_storage.get(), - configuration.container, - configuration.connection_url, - iterator_wrapper, - need_only_count)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageAzureBlob::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) -{ - if (configuration.withGlobsIgnorePartitionWildcard()) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode", configuration.blob_path); - - auto path = configuration.blobs_paths.front(); - auto sample_block = metadata_snapshot->getSampleBlock(); - auto chosen_compression_method = chooseCompressionMethod(path, configuration.compression_method); - auto insert_query = std::dynamic_pointer_cast(query); - - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && configuration.withPartitionWildcard(); - - if (is_partitioned_implementation) - { - return std::make_shared( - partition_by_ast, - configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - object_storage.get(), - path); - } - else - { - if (auto new_path = checkAndGetNewFileOnInsertIfNeeded(local_context, object_storage.get(), path, configuration.blobs_paths.size())) - { - configuration.blobs_paths.push_back(*new_path); - path = *new_path; - } - - return std::make_shared( - configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - object_storage.get(), - path); - } -} - -bool StorageAzureBlob::supportsPartitionBy() const -{ - return true; -} - -bool StorageAzureBlob::supportsSubsetOfColumns(const ContextPtr & context) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context, format_settings); -} - -bool StorageAzureBlob::prefersLargeBlocks() const -{ - return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration.format); -} - -bool StorageAzureBlob::parallelizeOutputAfterReading(ContextPtr context) const -{ - return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context); -} - -StorageAzureBlobSource::GlobIterator::GlobIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - String blob_path_with_globs_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context_, - RelativePathsWithMetadata * outer_blobs_, - std::function file_progress_callback_) - : IIterator(context_) - , object_storage(object_storage_) - , container(container_) - , blob_path_with_globs(blob_path_with_globs_) - , virtual_columns(virtual_columns_) - , outer_blobs(outer_blobs_) - , file_progress_callback(file_progress_callback_) -{ - - const String key_prefix = blob_path_with_globs.substr(0, blob_path_with_globs.find_first_of("*?{")); - - /// We don't have to list bucket, because there is no asterisks. - if (key_prefix.size() == blob_path_with_globs.size()) - { - auto object_metadata = object_storage->getObjectMetadata(blob_path_with_globs); - blobs_with_metadata.emplace_back( - blob_path_with_globs, - object_metadata); - if (outer_blobs) - outer_blobs->emplace_back(blobs_with_metadata.back()); - if (file_progress_callback) - file_progress_callback(FileProgress(0, object_metadata.size_bytes)); - is_finished = true; - return; - } - - object_storage_iterator = object_storage->iterate(key_prefix); - - matcher = std::make_unique(makeRegexpPatternFromGlobs(blob_path_with_globs)); - - if (!matcher->ok()) - throw Exception( - ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", blob_path_with_globs, matcher->error()); - - recursive = blob_path_with_globs == "/**" ? true : false; - - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); -} - -RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next() -{ - std::lock_guard lock(next_mutex); - - if (is_finished && index >= blobs_with_metadata.size()) - { - return {}; - } - - bool need_new_batch = blobs_with_metadata.empty() || index >= blobs_with_metadata.size(); - - if (need_new_batch) - { - RelativePathsWithMetadata new_batch; - while (new_batch.empty()) - { - auto result = object_storage_iterator->getCurrrentBatchAndScheduleNext(); - if (result.has_value()) - { - new_batch = result.value(); - } - else - { - is_finished = true; - return {}; - } - - for (auto it = new_batch.begin(); it != new_batch.end();) - { - if (!recursive && !re2::RE2::FullMatch(it->relative_path, *matcher)) - it = new_batch.erase(it); - else - ++it; - } - } - - index = 0; - - if (filter_dag) - { - std::vector paths; - paths.reserve(new_batch.size()); - for (auto & path_with_metadata : new_batch) - paths.push_back(fs::path(container) / path_with_metadata.relative_path); - - VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); - } - - if (outer_blobs) - outer_blobs->insert(outer_blobs->end(), new_batch.begin(), new_batch.end()); - - blobs_with_metadata = std::move(new_batch); - if (file_progress_callback) - { - for (const auto & [relative_path, info] : blobs_with_metadata) - { - file_progress_callback(FileProgress(0, info.size_bytes)); - } - } - } - - size_t current_index = index++; - if (current_index >= blobs_with_metadata.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Index out of bound for blob metadata"); - return blobs_with_metadata[current_index]; -} - -StorageAzureBlobSource::KeysIterator::KeysIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - const Strings & keys_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context_, - RelativePathsWithMetadata * outer_blobs, - std::function file_progress_callback) - : IIterator(context_) - , object_storage(object_storage_) - , container(container_) - , virtual_columns(virtual_columns_) -{ - Strings all_keys = keys_; - - ASTPtr filter_ast; - if (!all_keys.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - Strings paths; - paths.reserve(all_keys.size()); - for (const auto & key : all_keys) - paths.push_back(fs::path(container) / key); - - VirtualColumnUtils::filterByPathOrFile(all_keys, paths, filter_dag, virtual_columns, getContext()); - } - - for (auto && key : all_keys) - { - ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); - if (file_progress_callback) - file_progress_callback(FileProgress(0, object_metadata.size_bytes)); - keys.emplace_back(key, object_metadata); - } - - if (outer_blobs) - *outer_blobs = keys; -} - -RelativePathWithMetadata StorageAzureBlobSource::KeysIterator::next() -{ - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= keys.size()) - return {}; - - return keys[current_index]; -} - -Chunk StorageAzureBlobSource::generate() -{ - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (const auto * input_format = reader.getInputFormat()) - chunk_size = input_format->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, - requested_virtual_columns, - fs::path(container) / reader.getRelativePath(), - reader.getRelativePathWithMetadata().metadata.size_bytes); - return chunk; - } - - if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getRelativePath(), total_rows_in_file); - - total_rows_in_file = 0; - - assert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - break; - - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - create_reader_pool.wait(); - reader_future = createReaderAsync(); - } - - return {}; -} - -void StorageAzureBlobSource::addNumRowsToCache(const String & path, size_t num_rows) -{ - String source = fs::path(connection_url) / container / path; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional StorageAzureBlobSource::tryGetNumRowsFromCache(const DB::RelativePathWithMetadata & path_with_metadata) -{ - String source = fs::path(connection_url) / container / path_with_metadata.relative_path; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - auto last_mod = path_with_metadata.metadata.last_modified; - if (last_mod) - return last_mod->epochTime(); - return std::nullopt; - }; - - return StorageAzureBlob::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -StorageAzureBlobSource::StorageAzureBlobSource( - const ReadFromFormatInfo & info, - const String & format_, - String name_, - const ContextPtr & context_, - std::optional format_settings_, - UInt64 max_block_size_, - String compression_hint_, - AzureObjectStorage * object_storage_, - const String & container_, - const String & connection_url_, - std::shared_ptr file_iterator_, - bool need_only_count_) - :ISource(info.source_header, false) - , WithContext(context_) - , requested_columns(info.requested_columns) - , requested_virtual_columns(info.requested_virtual_columns) - , format(format_) - , name(std::move(name_)) - , sample_block(info.format_header) - , format_settings(format_settings_) - , columns_desc(info.columns_description) - , max_block_size(max_block_size_) - , compression_hint(compression_hint_) - , object_storage(std::move(object_storage_)) - , container(container_) - , connection_url(connection_url_) - , file_iterator(file_iterator_) - , need_only_count(need_only_count_) - , create_reader_pool(CurrentMetrics::ObjectStorageAzureThreads, CurrentMetrics::ObjectStorageAzureThreadsActive, CurrentMetrics::ObjectStorageAzureThreadsScheduled, 1) - , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(create_reader_pool, "AzureReader")) -{ - reader = createReader(); - if (reader) - reader_future = createReaderAsync(); -} - - -StorageAzureBlobSource::~StorageAzureBlobSource() -{ - create_reader_pool.wait(); -} - -String StorageAzureBlobSource::getName() const -{ - return name; -} - -StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader() -{ - auto path_with_metadata = file_iterator->next(); - if (path_with_metadata.relative_path.empty()) - return {}; - - if (path_with_metadata.metadata.size_bytes == 0) - path_with_metadata.metadata = object_storage->getObjectMetadata(path_with_metadata.relative_path); - - QueryPipelineBuilder builder; - std::shared_ptr source; - std::unique_ptr read_buf; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files - ? tryGetNumRowsFromCache(path_with_metadata) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - source = std::make_shared(sample_block, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - std::optional max_parsing_threads; - if (need_only_count) - max_parsing_threads = 1; - - auto compression_method = chooseCompressionMethod(path_with_metadata.relative_path, compression_hint); - read_buf = createAzureReadBuffer(path_with_metadata.relative_path, path_with_metadata.metadata.size_bytes); - auto input_format = FormatFactory::instance().getInput( - format, *read_buf, sample_block, getContext(), max_block_size, - format_settings, max_parsing_threads, std::nullopt, - /* is_remote_fs */ true, compression_method); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - - if (columns_desc.hasDefaults()) - { - builder.addSimpleTransform( - [&](const Block & header) - { return std::make_shared(header, columns_desc, *input_format, getContext()); }); - } - - source = input_format; - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - auto current_reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - - return ReaderHolder{path_with_metadata, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)}; -} - -std::future StorageAzureBlobSource::createReaderAsync() -{ - return create_reader_scheduler([this] { return createReader(); }, Priority{}); -} - -std::unique_ptr StorageAzureBlobSource::createAzureReadBuffer(const String & key, size_t object_size) -{ - auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); - read_settings.enable_filesystem_cache = false; - auto download_buffer_size = getContext()->getSettings().max_download_buffer_size; - const bool object_too_small = object_size <= 2 * download_buffer_size; - - // Create a read buffer that will prefetch the first ~1 MB of the file. - // When reading lots of tiny files, this prefetching almost doubles the throughput. - // For bigger files, parallel reading is more useful. - if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - LOG_TRACE(log, "Downloading object of size {} from Azure with initial prefetch", object_size); - return createAsyncAzureReadBuffer(key, read_settings, object_size); - } - - return object_storage->readObject(StoredObject(key), read_settings, {}, object_size); -} - -namespace -{ - class ReadBufferIterator : public IReadBufferIterator, WithContext - { - public: - ReadBufferIterator( - const std::shared_ptr & file_iterator_, - AzureObjectStorage * object_storage_, - std::optional format_, - const StorageAzureBlob::Configuration & configuration_, - const std::optional & format_settings_, - const RelativePathsWithMetadata & read_keys_, - const ContextPtr & context_) - : WithContext(context_) - , file_iterator(file_iterator_) - , object_storage(object_storage_) - , configuration(configuration_) - , format(std::move(format_)) - , format_settings(format_settings_) - , read_keys(read_keys_) - , prev_read_keys_size(read_keys_.size()) - { - } - - Data next() override - { - /// For default mode check cached columns for currently read keys on first iteration. - if (first) - { - /// If format is unknown we iterate through all currently read keys on first iteration and - /// try to determine format by file name. - if (!format) - { - for (const auto & key : read_keys) - { - if (auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(key.relative_path)) - { - format = format_from_path; - break; - } - } - } - - /// For default mode check cached columns for currently read keys on first iteration. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns, format}; - } - } - - current_path_with_metadata = file_iterator->next(); - - if (current_path_with_metadata.relative_path.empty()) - { - if (first) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files with provided path " - "in AzureBlobStorage. You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because there are no files with provided path " - "in AzureBlobStorage. You can specify table structure manually"); - } - - return {nullptr, std::nullopt, format}; - } - - first = false; - - /// AzureBlobStorage file iterator could get new keys after new iteration. - if (read_keys.size() > prev_read_keys_size) - { - /// If format is unknown we can try to determine it by new file names. - if (!format) - { - for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) - { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it).relative_path)) - { - format = format_from_file_name; - break; - } - } - } - /// Check new files in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); - if (columns_from_cache) - return {nullptr, columns_from_cache, format}; - } - - prev_read_keys_size = read_keys.size(); - } - - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - RelativePathsWithMetadata paths = {current_path_with_metadata}; - if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end())) - return {nullptr, columns_from_cache, format}; - } - - first = false; - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - return {wrapReadBufferWithCompressionMethod( - object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes), - chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method), - zstd_window_log_max), std::nullopt, format}; - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure) - return; - - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; - auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; - auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addColumns(key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; - Strings sources; - sources.reserve(read_keys.size()); - std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; }); - auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override { return current_path_with_metadata.relative_path; } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - return wrapReadBufferWithCompressionMethod( - object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes), - chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method), - zstd_window_log_max); - } - - private: - std::optional tryGetColumnsFromCache(const RelativePathsWithMetadata::const_iterator & begin, const RelativePathsWithMetadata::const_iterator & end) - { - auto context = getContext(); - if (!context->getSettingsRef().schema_inference_use_cache_for_azure) - return std::nullopt; - - auto & schema_cache = StorageAzureBlob::getSchemaCache(context); - for (auto it = begin; it < end; ++it) - { - auto get_last_mod_time = [&] -> std::optional - { - if (it->metadata.last_modified) - return it->metadata.last_modified->epochTime(); - return std::nullopt; - }; - - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; - String source = host_and_bucket + '/' + it->relative_path; - if (format) - { - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; - } - else - { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry for some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) - { - auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } - } - } - } - - return std::nullopt; - } - - std::shared_ptr file_iterator; - AzureObjectStorage * object_storage; - const StorageAzureBlob::Configuration & configuration; - std::optional format; - const std::optional & format_settings; - const RelativePathsWithMetadata & read_keys; - size_t prev_read_keys_size; - RelativePathWithMetadata current_path_with_metadata; - bool first = true; - }; -} - -std::pair StorageAzureBlob::getTableStructureAndFormatFromDataImpl( - std::optional format, - AzureObjectStorage * object_storage, - const Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx) -{ - RelativePathsWithMetadata read_keys; - std::shared_ptr file_iterator; - if (configuration.withGlobs()) - { - file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blob_path, nullptr, NamesAndTypesList{}, ctx, &read_keys); - } - else - { - file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys); - } - - ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, format, configuration, format_settings, read_keys, ctx); - if (format) - return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, ctx), *format}; - return detectFormatAndReadSchema(format_settings, read_buffer_iterator, ctx); -} - -std::pair StorageAzureBlob::getTableStructureAndFormatFromData( - DB::AzureObjectStorage * object_storage, - const DB::StorageAzureBlob::Configuration & configuration, - const std::optional & format_settings, - const DB::ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(std::nullopt, object_storage, configuration, format_settings, ctx); -} - -ColumnsDescription StorageAzureBlob::getTableStructureFromData( - DB::AzureObjectStorage * object_storage, - const DB::StorageAzureBlob::Configuration & configuration, - const std::optional & format_settings, - const DB::ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(configuration.format, object_storage, configuration, format_settings, ctx).first; -} - -SchemaCache & StorageAzureBlob::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_azure", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} - - -std::unique_ptr StorageAzureBlobSource::createAsyncAzureReadBuffer( - const String & key, const ReadSettings & read_settings, size_t object_size) -{ - auto modified_settings{read_settings}; - modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; - auto async_reader = object_storage->readObjects(StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, modified_settings); - - async_reader->setReadUntilEnd(); - if (read_settings.remote_fs_prefetch) - async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); - - return async_reader; -} - -} - -#endif diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp deleted file mode 100644 index 7975b42ac02..00000000000 --- a/src/Storages/StorageS3.cpp +++ /dev/null @@ -1,2310 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "Common/logger_useful.h" -#include "IO/CompressionMethod.h" -#include "IO/ReadBuffer.h" -#include "Interpreters/Context_fwd.h" -#include "Storages/MergeTree/ReplicatedMergeTreePartHeader.h" - -#if USE_AWS_S3 - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#include -#pragma clang diagnostic pop - -namespace fs = std::filesystem; - - -namespace CurrentMetrics -{ - extern const Metric StorageS3Threads; - extern const Metric StorageS3ThreadsActive; - extern const Metric StorageS3ThreadsScheduled; -} - -namespace ProfileEvents -{ - extern const Event S3DeleteObjects; - extern const Event S3ListObjects; - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ - -static const std::unordered_set required_configuration_keys = { - "url", -}; -static const std::unordered_set optional_configuration_keys = { - "format", - "compression", - "compression_method", - "structure", - "access_key_id", - "secret_access_key", - "session_token", - "filename", - "use_environment_credentials", - "max_single_read_retries", - "min_upload_part_size", - "upload_part_size_multiply_factor", - "upload_part_size_multiply_parts_count_threshold", - "max_single_part_upload_size", - "max_connections", - "expiration_window_seconds", - "no_sign_request" -}; - -namespace ErrorCodes -{ - extern const int CANNOT_PARSE_TEXT; - extern const int BAD_ARGUMENTS; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int S3_ERROR; - extern const int UNEXPECTED_EXPRESSION; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int CANNOT_DETECT_FORMAT; - extern const int NOT_IMPLEMENTED; - extern const int CANNOT_COMPILE_REGEXP; - extern const int FILE_DOESNT_EXIST; - extern const int NO_ELEMENTS_IN_CONFIG; -} - - -class ReadFromStorageS3Step : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromStorageS3Step"; } - - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromStorageS3Step( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - StorageS3 & storage_, - ReadFromFormatInfo read_from_format_info_, - bool need_only_count_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) - , column_names(column_names_) - , storage(storage_) - , read_from_format_info(std::move(read_from_format_info_)) - , need_only_count(need_only_count_) - , query_configuration(storage.getConfigurationCopy()) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - query_configuration.update(context); - virtual_columns = storage.getVirtualsList(); - } - -private: - Names column_names; - StorageS3 & storage; - ReadFromFormatInfo read_from_format_info; - bool need_only_count; - StorageS3::Configuration query_configuration; - NamesAndTypesList virtual_columns; - - size_t max_block_size; - size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - - -class IOutputFormat; -using OutputFormatPtr = std::shared_ptr; - -class StorageS3Source::DisclosedGlobIterator::Impl : WithContext -{ -public: - Impl( - const S3::Client & client_, - const S3::URI & globbed_uri_, - const ActionsDAG::Node * predicate_, - const NamesAndTypesList & virtual_columns_, - ContextPtr context_, - KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_, - std::function file_progress_callback_) - : WithContext(context_) - , client(client_.clone()) - , globbed_uri(globbed_uri_) - , predicate(predicate_) - , virtual_columns(virtual_columns_) - , read_keys(read_keys_) - , request_settings(request_settings_) - , list_objects_pool( - CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) - , list_objects_scheduler(threadPoolCallbackRunnerUnsafe(list_objects_pool, "ListObjects")) - , file_progress_callback(file_progress_callback_) - { - if (globbed_uri.bucket.find_first_of("*?{") != std::string::npos) - throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name"); - - expanded_keys = expandSelectionGlob(globbed_uri.key); - expanded_keys_iter = expanded_keys.begin(); - - fillBufferForKey(*expanded_keys_iter); - expanded_keys_iter++; - } - - KeyWithInfoPtr next(size_t) - { - std::lock_guard lock(mutex); - return nextAssumeLocked(); - } - - size_t objectsCount() - { - return buffer.size(); - } - - bool hasMore() - { - if (buffer.empty()) - return !(expanded_keys_iter == expanded_keys.end() && is_finished_for_key); - else - return true; - } - - ~Impl() - { - list_objects_pool.wait(); - } - -private: - using ListObjectsOutcome = Aws::S3::Model::ListObjectsV2Outcome; - - void fillBufferForKey(const std::string & uri_key) - { - is_finished_for_key = false; - const String key_prefix = uri_key.substr(0, uri_key.find_first_of("*?{")); - - /// We don't have to list bucket, because there is no asterisks. - if (key_prefix.size() == uri_key.size()) - { - buffer.clear(); - buffer.emplace_back(std::make_shared(uri_key, std::nullopt)); - buffer_iter = buffer.begin(); - if (read_keys) - read_keys->insert(read_keys->end(), buffer.begin(), buffer.end()); - is_finished_for_key = true; - return; - } - - request = {}; - request.SetBucket(globbed_uri.bucket); - request.SetPrefix(key_prefix); - request.SetMaxKeys(static_cast(request_settings.list_object_keys_size)); - - outcome_future = listObjectsAsync(); - - matcher = std::make_unique(makeRegexpPatternFromGlobs(uri_key)); - if (!matcher->ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", uri_key, matcher->error()); - - recursive = globbed_uri.key == "/**"; - - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - fillInternalBufferAssumeLocked(); - } - - KeyWithInfoPtr nextAssumeLocked() - { - do - { - if (buffer_iter != buffer.end()) - { - auto answer = *buffer_iter; - ++buffer_iter; - - /// If url doesn't contain globs, we didn't list s3 bucket and didn't get object info for the key. - /// So we get object info lazily here on 'next()' request. - if (!answer->info) - { - try - { - answer->info = S3::getObjectInfo(*client, globbed_uri.bucket, answer->key, globbed_uri.version_id, request_settings); - } - catch (...) - { - /// if no such file AND there was no `{}` glob -- this is an exception - /// otherwise ignore it, this is acceptable - if (expanded_keys.size() == 1) - throw; - continue; - } - if (file_progress_callback) - file_progress_callback(FileProgress(0, answer->info->size)); - } - - return answer; - } - - if (is_finished_for_key) - { - if (expanded_keys_iter != expanded_keys.end()) - { - fillBufferForKey(*expanded_keys_iter); - expanded_keys_iter++; - continue; - } - else - return {}; - } - - try - { - fillInternalBufferAssumeLocked(); - } - catch (...) - { - /// In case of exception thrown while listing new batch of files - /// iterator may be partially initialized and its further using may lead to UB. - /// Iterator is used by several processors from several threads and - /// it may take some time for threads to stop processors and they - /// may still use this iterator after exception is thrown. - /// To avoid this UB, reset the buffer and return defaults for further calls. - is_finished_for_key = true; - buffer.clear(); - buffer_iter = buffer.begin(); - throw; - } - } while (true); - } - - void fillInternalBufferAssumeLocked() - { - buffer.clear(); - assert(outcome_future.valid()); - auto outcome = outcome_future.get(); - - if (!outcome.IsSuccess()) - { - throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", - quoteString(request.GetBucket()), quoteString(request.GetPrefix()), - backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); - } - - const auto & result_batch = outcome.GetResult().GetContents(); - - /// It returns false when all objects were returned - is_finished_for_key = !outcome.GetResult().GetIsTruncated(); - - if (!is_finished_for_key) - { - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - list_objects_pool.wait(); - outcome_future = listObjectsAsync(); - } - - if (request_settings.throw_on_zero_files_match && result_batch.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files using prefix {}", request.GetPrefix()); - - KeysWithInfo temp_buffer; - temp_buffer.reserve(result_batch.size()); - - for (const auto & row : result_batch) - { - String key = row.GetKey(); - if (recursive || re2::RE2::FullMatch(key, *matcher)) - { - S3::ObjectInfo info = - { - .size = size_t(row.GetSize()), - .last_modification_time = row.GetLastModified().Millis() / 1000, - }; - - temp_buffer.emplace_back(std::make_shared(std::move(key), std::move(info))); - } - } - - if (temp_buffer.empty()) - { - buffer_iter = buffer.begin(); - return; - } - - if (filter_dag) - { - std::vector paths; - paths.reserve(temp_buffer.size()); - for (const auto & key_with_info : temp_buffer) - paths.push_back(fs::path(globbed_uri.bucket) / key_with_info->key); - - VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, filter_dag, virtual_columns, getContext()); - } - - buffer = std::move(temp_buffer); - - if (file_progress_callback) - { - for (const auto & key_with_info : buffer) - file_progress_callback(FileProgress(0, key_with_info->info->size)); - } - - /// Set iterator only after the whole batch is processed - buffer_iter = buffer.begin(); - - if (read_keys) - read_keys->insert(read_keys->end(), buffer.begin(), buffer.end()); - } - - std::future listObjectsAsync() - { - return list_objects_scheduler([this] - { - ProfileEvents::increment(ProfileEvents::S3ListObjects); - auto outcome = client->ListObjectsV2(request); - - /// Outcome failure will be handled on the caller side. - if (outcome.IsSuccess()) - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - - return outcome; - }, Priority{}); - } - - std::mutex mutex; - - KeysWithInfo buffer; - KeysWithInfo::iterator buffer_iter; - - std::vector expanded_keys; - std::vector::iterator expanded_keys_iter; - - std::unique_ptr client; - S3::URI globbed_uri; - const ActionsDAG::Node * predicate; - ASTPtr query; - NamesAndTypesList virtual_columns; - ActionsDAGPtr filter_dag; - std::unique_ptr matcher; - bool recursive{false}; - bool is_finished_for_key{false}; - KeysWithInfo * read_keys; - - S3::ListObjectsV2Request request; - S3Settings::RequestSettings request_settings; - - ThreadPool list_objects_pool; - ThreadPoolCallbackRunnerUnsafe list_objects_scheduler; - std::future outcome_future; - std::function file_progress_callback; -}; - -StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( - const S3::Client & client_, - const S3::URI & globbed_uri_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context, - KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_, - std::function file_progress_callback_) - : pimpl(std::make_shared( - client_, globbed_uri_, predicate, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_)) -{ -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::DisclosedGlobIterator::next(size_t idx) /// NOLINT -{ - return pimpl->next(idx); -} - -size_t StorageS3Source::DisclosedGlobIterator::estimatedKeysCount() -{ - if (pimpl->hasMore()) - { - /// 1000 files were listed, and we cannot make any estimation of _how many more_ there are (because we list bucket lazily); - /// If there are more objects in the bucket, limiting the number of streams is the last thing we may want to do - /// as it would lead to serious slow down of the execution, since objects are going - /// to be fetched sequentially rather than in-parallel with up to times. - return std::numeric_limits::max(); - } - else - return pimpl->objectsCount(); -} - -class StorageS3Source::KeysIterator::Impl -{ -public: - explicit Impl( - const S3::Client & client_, - const std::string & version_id_, - const std::vector & keys_, - const String & bucket_, - const S3Settings::RequestSettings & request_settings_, - KeysWithInfo * read_keys_, - std::function file_progress_callback_) - : keys(keys_) - , client(client_.clone()) - , version_id(version_id_) - , bucket(bucket_) - , request_settings(request_settings_) - , file_progress_callback(file_progress_callback_) - { - if (read_keys_) - { - for (const auto & key : keys) - read_keys_->push_back(std::make_shared(key)); - } - } - - KeyWithInfoPtr next(size_t) - { - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= keys.size()) - return {}; - auto key = keys[current_index]; - std::optional info; - if (file_progress_callback) - { - info = S3::getObjectInfo(*client, bucket, key, version_id, request_settings); - file_progress_callback(FileProgress(0, info->size)); - } - - return std::make_shared(key, info); - } - - size_t objectsCount() - { - return keys.size(); - } - -private: - Strings keys; - std::atomic_size_t index = 0; - std::unique_ptr client; - String version_id; - String bucket; - S3Settings::RequestSettings request_settings; - std::function file_progress_callback; -}; - -StorageS3Source::KeysIterator::KeysIterator( - const S3::Client & client_, - const std::string & version_id_, - const std::vector & keys_, - const String & bucket_, - const S3Settings::RequestSettings & request_settings_, - KeysWithInfo * read_keys, - std::function file_progress_callback_) - : pimpl(std::make_shared( - client_, version_id_, keys_, bucket_, request_settings_, read_keys, file_progress_callback_)) -{ -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::KeysIterator::next(size_t idx) /// NOLINT -{ - return pimpl->next(idx); -} - -size_t StorageS3Source::KeysIterator::estimatedKeysCount() -{ - return pimpl->objectsCount(); -} - -StorageS3Source::ReadTaskIterator::ReadTaskIterator( - const DB::ReadTaskCallback & callback_, - size_t max_threads_count) - : callback(callback_) -{ - ThreadPool pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, max_threads_count); - auto pool_scheduler = threadPoolCallbackRunnerUnsafe(pool, "S3ReadTaskItr"); - - std::vector> keys; - keys.reserve(max_threads_count); - for (size_t i = 0; i < max_threads_count; ++i) - keys.push_back(pool_scheduler([this] { return callback(); }, Priority{})); - - pool.wait(); - buffer.reserve(max_threads_count); - for (auto & key_future : keys) - buffer.emplace_back(std::make_shared(key_future.get())); -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::ReadTaskIterator::next(size_t) /// NOLINT -{ - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= buffer.size()) - return std::make_shared(callback()); - - while (current_index < buffer.size()) - { - if (const auto & key_info = buffer[current_index]; key_info && !key_info->key.empty()) - return buffer[current_index]; - - current_index = index.fetch_add(1, std::memory_order_relaxed); - } - - return nullptr; -} - -size_t StorageS3Source::ReadTaskIterator::estimatedKeysCount() -{ - return buffer.size(); -} - - -StorageS3Source::ArchiveIterator::ArchiveIterator( - std::unique_ptr basic_iterator_, - const std::string & archive_pattern_, - std::shared_ptr client_, - const String & bucket_, - const String & version_id_, - const S3Settings::RequestSettings & request_settings_, - ContextPtr context_, - KeysWithInfo * read_keys_) - : WithContext(context_) - , basic_iterator(std::move(basic_iterator_)) - , basic_key_with_info_ptr(nullptr) - , client(client_) - , bucket(bucket_) - , version_id(version_id_) - , request_settings(request_settings_) - , read_keys(read_keys_) -{ - if (archive_pattern_.find_first_of("*?{") != std::string::npos) - { - auto matcher = std::make_shared(makeRegexpPatternFromGlobs(archive_pattern_)); - if (!matcher->ok()) - throw Exception( - ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", archive_pattern_, matcher->error()); - filter = IArchiveReader::NameFilter{[matcher](const std::string & p) mutable { return re2::RE2::FullMatch(p, *matcher); }}; - } - else - { - path_in_archive = archive_pattern_; - } -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::ArchiveIterator::next(size_t) -{ - if (!path_in_archive.empty()) - { - std::unique_lock lock{take_next_mutex}; - while (true) - { - basic_key_with_info_ptr = basic_iterator->next(); - if (!basic_key_with_info_ptr) - return {}; - refreshArchiveReader(); - bool file_exists = archive_reader->fileExists(path_in_archive); - if (file_exists) - { - KeyWithInfoPtr archive_key_with_info - = std::make_shared(basic_key_with_info_ptr->key, std::nullopt, path_in_archive, archive_reader); - if (read_keys != nullptr) - read_keys->push_back(archive_key_with_info); - return archive_key_with_info; - } - } - } - else - { - std::unique_lock lock{take_next_mutex}; - while (true) - { - if (!file_enumerator) - { - basic_key_with_info_ptr = basic_iterator->next(); - if (!basic_key_with_info_ptr) - return {}; - refreshArchiveReader(); - file_enumerator = archive_reader->firstFile(); - if (!file_enumerator) - { - file_enumerator.reset(); - continue; - } - } - else if (!file_enumerator->nextFile()) - { - file_enumerator.reset(); - continue; - } - - String current_filename = file_enumerator->getFileName(); - bool satisfies = filter(current_filename); - if (satisfies) - { - KeyWithInfoPtr archive_key_with_info - = std::make_shared(basic_key_with_info_ptr->key, std::nullopt, current_filename, archive_reader); - if (read_keys != nullptr) - read_keys->push_back(archive_key_with_info); - return archive_key_with_info; - } - } - } -} - -size_t StorageS3Source::ArchiveIterator::estimatedKeysCount() -{ - return basic_iterator->estimatedKeysCount(); -} - -void StorageS3Source::ArchiveIterator::refreshArchiveReader() -{ - if (basic_key_with_info_ptr) - { - if (!basic_key_with_info_ptr->info) - { - basic_key_with_info_ptr->info = S3::getObjectInfo(*client, bucket, basic_key_with_info_ptr->key, version_id, request_settings); - } - archive_reader = createArchiveReader( - basic_key_with_info_ptr->key, - [key = basic_key_with_info_ptr->key, archive_size = basic_key_with_info_ptr->info.value().size, context = getContext(), this]() - { return createS3ReadBuffer(key, archive_size, context, client, bucket, version_id, request_settings); }, - basic_key_with_info_ptr->info.value().size); - } - else - { - archive_reader = nullptr; - } -} - -StorageS3Source::StorageS3Source( - const ReadFromFormatInfo & info, - const String & format_, - String name_, - const ContextPtr & context_, - std::optional format_settings_, - UInt64 max_block_size_, - const S3Settings::RequestSettings & request_settings_, - String compression_hint_, - const std::shared_ptr & client_, - const String & bucket_, - const String & version_id_, - const String & url_host_and_port_, - std::shared_ptr file_iterator_, - const size_t max_parsing_threads_, - bool need_only_count_) - : SourceWithKeyCondition(info.source_header, false) - , WithContext(context_) - , name(std::move(name_)) - , bucket(bucket_) - , version_id(version_id_) - , url_host_and_port(url_host_and_port_) - , format(format_) - , columns_desc(info.columns_description) - , requested_columns(info.requested_columns) - , max_block_size(max_block_size_) - , request_settings(request_settings_) - , compression_hint(std::move(compression_hint_)) - , client(client_) - , sample_block(info.format_header) - , format_settings(format_settings_) - , requested_virtual_columns(info.requested_virtual_columns) - , file_iterator(file_iterator_) - , max_parsing_threads(max_parsing_threads_) - , need_only_count(need_only_count_) - , create_reader_pool( - CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) - , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(create_reader_pool, "CreateS3Reader")) -{ -} - -void StorageS3Source::lazyInitialize(size_t idx) -{ - if (initialized) - return; - - reader = createReader(idx); - if (reader) - reader_future = createReaderAsync(idx); - initialized = true; -} - -StorageS3Source::ReaderHolder StorageS3Source::createReader(size_t idx) -{ - KeyWithInfoPtr key_with_info; - do - { - key_with_info = file_iterator->next(idx); - if (!key_with_info || key_with_info->key.empty()) - return {}; - - if (!key_with_info->info) - key_with_info->info = S3::getObjectInfo(*client, bucket, key_with_info->key, version_id, request_settings); - } - while (getContext()->getSettingsRef().s3_skip_empty_files && key_with_info->info->size == 0); - - QueryPipelineBuilder builder; - std::shared_ptr source; - std::unique_ptr read_buf; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(*key_with_info) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - source = std::make_shared(sample_block, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - auto compression_method = CompressionMethod::None; - if (!key_with_info->path_in_archive.has_value()) - { - compression_method = chooseCompressionMethod(key_with_info->key, compression_hint); - read_buf = createS3ReadBuffer( - key_with_info->key, key_with_info->info->size, getContext(), client, bucket, version_id, request_settings); - } - else - { - compression_method = chooseCompressionMethod(key_with_info->path_in_archive.value(), compression_hint); - read_buf = key_with_info->archive_reader->readFile(key_with_info->path_in_archive.value(), /*throw_on_not_found=*/true); - } - auto input_format = FormatFactory::instance().getInput( - format, - *read_buf, - sample_block, - getContext(), - max_block_size, - format_settings, - max_parsing_threads, - /* max_download_threads= */ std::nullopt, - /* is_remote_fs */ true, - compression_method, - need_only_count); - - if (key_condition) - input_format->setKeyCondition(key_condition); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - - if (columns_desc.hasDefaults()) - { - builder.addSimpleTransform( - [&](const Block & header) - { return std::make_shared(header, columns_desc, *input_format, getContext()); }); - } - - source = input_format; - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - auto current_reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - - return ReaderHolder{key_with_info, bucket, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)}; -} - -std::future StorageS3Source::createReaderAsync(size_t idx) -{ - return create_reader_scheduler([=, this] { return createReader(idx); }, Priority{}); -} - -std::unique_ptr createS3ReadBuffer( - const String & key, - size_t object_size, - std::shared_ptr context, - std::shared_ptr client_ptr, - const String & bucket, - const String & version_id, - const S3Settings::RequestSettings & request_settings) -{ - auto read_settings = context->getReadSettings().adjustBufferSize(object_size); - read_settings.enable_filesystem_cache = false; - auto download_buffer_size = context->getSettings().max_download_buffer_size; - const bool object_too_small = object_size <= 2 * download_buffer_size; - static LoggerPtr log = getLogger("StorageS3Source"); - - // Create a read buffer that will prefetch the first ~1 MB of the file. - // When reading lots of tiny files, this prefetching almost doubles the throughput. - // For bigger files, parallel reading is more useful. - if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - LOG_TRACE(log, "Downloading object of size {} from S3 with initial prefetch", object_size); - return createAsyncS3ReadBuffer(key, read_settings, object_size, context, client_ptr, bucket, version_id, request_settings); - } - - - return std::make_unique( - client_ptr, - bucket, - key, - version_id, - request_settings, - read_settings, - /*use_external_buffer*/ false, - /*offset_*/ 0, - /*read_until_position_*/ 0, - /*restricted_seek_*/ false, - object_size); -} - -std::unique_ptr createAsyncS3ReadBuffer( - const String & key, - const ReadSettings & read_settings, - size_t object_size, - std::shared_ptr context, - std::shared_ptr client_ptr, - const String & bucket, - const String & version_id, - const S3Settings::RequestSettings & request_settings) -{ - auto read_buffer_creator = [=](bool restricted_seek, const StoredObject & object) -> std::unique_ptr - { - return std::make_unique( - client_ptr, - bucket, - object.remote_path, - version_id, - request_settings, - read_settings, - /* use_external_buffer */ true, - /* offset */ 0, - /* read_until_position */ 0, - restricted_seek, - object_size); - }; - - auto modified_settings{read_settings}; - /// User's S3 object may change, don't cache it. - modified_settings.use_page_cache_for_disks_without_file_cache = false; - - /// FIXME: Changing this setting to default value breaks something around parquet reading - modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; - - auto s3_impl = std::make_unique( - std::move(read_buffer_creator), - StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, - "", - read_settings, - /* cache_log */ nullptr, - /* use_external_buffer */ true); - - auto & pool_reader = context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); - auto async_reader = std::make_unique( - std::move(s3_impl), pool_reader, modified_settings, context->getAsyncReadCounters(), context->getFilesystemReadPrefetchesLog()); - - async_reader->setReadUntilEnd(); - if (read_settings.remote_fs_prefetch) - async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); - - return async_reader; -} - -StorageS3Source::~StorageS3Source() -{ - create_reader_pool.wait(); -} - -String StorageS3Source::getName() const -{ - return name; -} - -Chunk StorageS3Source::generate() -{ - lazyInitialize(); - - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (const auto * input_format = reader.getInputFormat()) - chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - String file_name = reader.getFile(); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, requested_virtual_columns, reader.getPath(), reader.getFileSize(), reader.isArchive() ? (&file_name) : nullptr); - return chunk; - } - - if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getPath(), total_rows_in_file); - - total_rows_in_file = 0; - - assert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - break; - - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - create_reader_pool.wait(); - reader_future = createReaderAsync(); - } - - return {}; -} - -void StorageS3Source::addNumRowsToCache(const String & bucket_with_key, size_t num_rows) -{ - String source = fs::path(url_host_and_port) / bucket_with_key; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional StorageS3Source::tryGetNumRowsFromCache(const KeyWithInfo & key_with_info) -{ - String source = fs::path(url_host_and_port) / bucket / key_with_info.key; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - auto get_last_mod_time = [&]() -> std::optional { return key_with_info.info->last_modification_time; }; - - return StorageS3::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -class StorageS3Sink : public SinkToStorage -{ -public: - StorageS3Sink( - const String & format, - const Block & sample_block_, - const ContextPtr & context, - std::optional format_settings_, - const CompressionMethod compression_method, - const StorageS3::Configuration & configuration_, - const String & bucket, - const String & key) - : SinkToStorage(sample_block_), sample_block(sample_block_), format_settings(format_settings_) - { - BlobStorageLogWriterPtr blob_log = nullptr; - if (auto blob_storage_log = context->getBlobStorageLog()) - { - blob_log = std::make_shared(std::move(blob_storage_log)); - blob_log->query_id = context->getCurrentQueryId(); - } - - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique( - configuration_.client, - bucket, - key, - DBMS_DEFAULT_BUFFER_SIZE, - configuration_.request_settings, - std::move(blob_log), - std::nullopt, - threadPoolCallbackRunnerUnsafe(getIOThreadPool().get(), "S3ParallelWrite"), - context->getWriteSettings()), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer - = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings); - } - - String getName() const override { return "StorageS3Sink"; } - - void consume(Chunk & chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf.reset(); - } - - Block sample_block; - std::optional format_settings; - std::unique_ptr write_buf; - OutputFormatPtr writer; - bool cancelled = false; - std::mutex cancel_mutex; -}; - -namespace -{ - -std::optional checkAndGetNewFileOnInsertIfNeeded( - const ContextPtr & context, const StorageS3::Configuration & configuration, const String & key, size_t sequence_number) -{ - if (context->getSettingsRef().s3_truncate_on_insert - || !S3::objectExists( - *configuration.client, configuration.url.bucket, key, configuration.url.version_id, configuration.request_settings)) - return std::nullopt; - - if (context->getSettingsRef().s3_create_new_file_on_insert) - { - auto pos = key.find_first_of('.'); - String new_key; - do - { - new_key = key.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : key.substr(pos)); - ++sequence_number; - } while (S3::objectExists( - *configuration.client, configuration.url.bucket, new_key, configuration.url.version_id, configuration.request_settings)); - - return new_key; - } - - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object in bucket {} with key {} already exists. " - "If you want to overwrite it, enable setting s3_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting s3_create_new_file_on_insert", - configuration.url.bucket, key); -} -} - - -class PartitionedStorageS3Sink : public PartitionedSink, WithContext -{ -public: - PartitionedStorageS3Sink( - const ASTPtr & partition_by, - const String & format_, - const Block & sample_block_, - const ContextPtr & context_, - std::optional format_settings_, - const CompressionMethod compression_method_, - const StorageS3::Configuration & configuration_, - const String & bucket_, - const String & key_) - : PartitionedSink(partition_by, context_, sample_block_) - , WithContext(context_) - , format(format_) - , sample_block(sample_block_) - , compression_method(compression_method_) - , configuration(configuration_) - , bucket(bucket_) - , key(key_) - , format_settings(format_settings_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto partition_bucket = replaceWildcards(bucket, partition_id); - validateBucket(partition_bucket); - - auto partition_key = replaceWildcards(key, partition_id); - validateKey(partition_key); - - if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(getContext(), configuration, partition_key, /* sequence_number */ 1)) - partition_key = *new_key; - - return std::make_shared( - format, sample_block, getContext(), format_settings, compression_method, configuration, partition_bucket, partition_key); - } - -private: - const String format; - const Block sample_block; - const CompressionMethod compression_method; - const StorageS3::Configuration configuration; - const String bucket; - const String key; - const std::optional format_settings; - - static void validateBucket(const String & str) - { - S3::URI::validateBucket(str, {}); - - if (!DB::UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in bucket name"); - - validatePartitionKey(str, false); - } - - static void validateKey(const String & str) - { - /// See: - /// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html - /// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#putobject - - if (str.empty() || str.size() > 1024) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect key length (not empty, max 1023 characters), got: {}", str.size()); - - if (!DB::UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in key"); - - validatePartitionKey(str, true); - } -}; - - -StorageS3::StorageS3( - const Configuration & configuration_, - const ContextPtr & context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , configuration(configuration_) - , name(configuration.url.storage_name) - , distributed_processing(distributed_processing_) - , format_settings(format_settings_) - , partition_by(partition_by_) -{ - updateConfiguration(context_); // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) - - if (configuration.format != "auto") - FormatFactory::instance().checkFormatName(configuration.format); - context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.url.uri); - context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration.headers_from_ast); - - StorageInMemoryMetadata storage_metadata; - if (columns_.empty()) - { - ColumnsDescription columns; - if (configuration.format == "auto") - std::tie(columns, configuration.format) = getTableStructureAndFormatFromData(configuration, format_settings, context_); - else - columns = getTableStructureFromData(configuration, format_settings, context_); - - storage_metadata.setColumns(columns); - } - else - { - if (configuration.format == "auto") - configuration.format = getTableStructureAndFormatFromData(configuration, format_settings, context_).second; - - /// We don't allow special columns in S3 storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); -} - -static std::shared_ptr createFileIterator( - StorageS3::Configuration configuration, - bool distributed_processing, - ContextPtr local_context, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns, - StorageS3Source::KeysWithInfo * read_keys = nullptr, - std::function file_progress_callback = {}) -{ - if (distributed_processing) - { - return std::make_shared( - local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads); - } - else - { - auto basic_iterator = [&]() -> std::unique_ptr - { - StorageS3Source::KeysWithInfo * local_read_keys = configuration.url.archive_pattern.has_value() ? nullptr : read_keys; - if (configuration.withGlobs()) - { - /// Iterate through disclosed globs and make a source for each file - return std::make_unique( - *configuration.client, - configuration.url, - predicate, - virtual_columns, - local_context, - local_read_keys, - configuration.request_settings, - file_progress_callback); - } - else - { - Strings keys = configuration.keys; - auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - if (filter_dag) - { - std::vector paths; - paths.reserve(keys.size()); - for (const auto & key : keys) - paths.push_back(fs::path(configuration.url.bucket) / key); - VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context); - } - return std::make_unique( - *configuration.client, - configuration.url.version_id, - keys, - configuration.url.bucket, - configuration.request_settings, - local_read_keys, - file_progress_callback); - } - }(); - if (configuration.url.archive_pattern.has_value()) - { - return std::make_shared( - std::move(basic_iterator), - configuration.url.archive_pattern.value(), - configuration.client, - configuration.url.bucket, - configuration.url.version_id, - configuration.request_settings, - local_context, - read_keys); - } - else - { - return basic_iterator; - } - } -} - -bool StorageS3::supportsSubsetOfColumns(const ContextPtr & context) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getFormatCopy(), context, format_settings); -} - -bool StorageS3::prefersLargeBlocks() const -{ - return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(getFormatCopy()); -} - -bool StorageS3::parallelizeOutputAfterReading(ContextPtr context) const -{ - return FormatFactory::instance().checkParallelizeOutputAfterReading(getFormatCopy(), context); -} - -void StorageS3::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr local_context, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - updateConfiguration(local_context); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); - - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && local_context->getSettingsRef().optimize_count_from_files; - - auto reading = std::make_unique( - column_names, - query_info, - storage_snapshot, - local_context, - read_from_format_info.source_header, - *this, - std::move(read_from_format_info), - need_only_count, - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromStorageS3Step::applyFilters(ActionDAGNodes added_filter_nodes) -{ - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - createIterator(predicate); -} - -void ReadFromStorageS3Step::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - iterator_wrapper = createFileIterator( - storage.getConfigurationCopy(), - storage.distributed_processing, - context, - predicate, - virtual_columns, - nullptr, - context->getFileProgressCallback()); -} - -void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - if (storage.partition_by && query_configuration.withPartitionWildcard()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned S3 storage is not implemented yet"); - - createIterator(nullptr); - size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount(); - if (estimated_keys_count > 1) - num_streams = std::min(num_streams, estimated_keys_count); - else - { - /// The amount of keys (zero) was probably underestimated. We will keep one stream for this particular case. - num_streams = 1; - } - - const size_t max_threads = context->getSettingsRef().max_threads; - const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul)); - - Pipes pipes; - pipes.reserve(num_streams); - for (size_t i = 0; i < num_streams; ++i) - { - auto source = std::make_shared( - read_from_format_info, - query_configuration.format, - storage.getName(), - context, - storage.format_settings, - max_block_size, - query_configuration.request_settings, - query_configuration.compression_method, - query_configuration.client, - query_configuration.url.bucket, - query_configuration.url.version_id, - query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()), - iterator_wrapper, - max_parsing_threads, - need_only_count); - - source->setKeyCondition(filter_actions_dag, context); - pipes.emplace_back(std::move(source)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(read_from_format_info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageS3::write( - const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) -{ - auto query_configuration = updateConfigurationAndGetCopy(local_context); - auto key = query_configuration.keys.front(); - - if (query_configuration.withGlobsIgnorePartitionWildcard()) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, - "S3 key '{}' contains globs, so the table is in readonly mode", query_configuration.url.key); - - auto sample_block = metadata_snapshot->getSampleBlock(); - auto chosen_compression_method = chooseCompressionMethod(query_configuration.keys.back(), query_configuration.compression_method); - auto insert_query = std::dynamic_pointer_cast(query); - - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && query_configuration.withPartitionWildcard(); - - if (is_partitioned_implementation) - { - return std::make_shared( - partition_by_ast, - query_configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - query_configuration, - query_configuration.url.bucket, - key); - } - else - { - if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(local_context, query_configuration, query_configuration.keys.front(), query_configuration.keys.size())) - { - std::lock_guard lock{configuration_update_mutex}; - query_configuration.keys.push_back(*new_key); - configuration.keys.push_back(*new_key); - key = *new_key; - } - - return std::make_shared( - query_configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - query_configuration, - query_configuration.url.bucket, - key); - } -} - -void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) -{ - auto query_configuration = updateConfigurationAndGetCopy(local_context); - - if (query_configuration.withGlobs()) - { - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "S3 key '{}' contains globs, so the table is in readonly mode", - query_configuration.url.key); - } - - Aws::S3::Model::Delete delkeys; - - for (const auto & key : query_configuration.keys) - { - Aws::S3::Model::ObjectIdentifier obj; - obj.SetKey(key); - delkeys.AddObjects(std::move(obj)); - } - - ProfileEvents::increment(ProfileEvents::S3DeleteObjects); - S3::DeleteObjectsRequest request; - request.SetBucket(query_configuration.url.bucket); - request.SetDelete(delkeys); - - auto response = query_configuration.client->DeleteObjects(request); - - const auto * response_error = response.IsSuccess() ? nullptr : &response.GetError(); - auto time_now = std::chrono::system_clock::now(); - if (auto blob_storage_log = BlobStorageLogWriter::create()) - for (const auto & key : query_configuration.keys) - blob_storage_log->addEvent( - BlobStorageLogElement::EventType::Delete, query_configuration.url.bucket, key, {}, 0, response_error, time_now); - - if (!response.IsSuccess()) - { - const auto & err = response.GetError(); - throw S3Exception(err.GetMessage(), err.GetErrorType()); - } - - for (const auto & error : response.GetResult().GetErrors()) - LOG_WARNING(getLogger("StorageS3"), "Failed to delete {}, error: {}", error.GetKey(), error.GetMessage()); -} - -StorageS3::Configuration StorageS3::updateConfigurationAndGetCopy(const ContextPtr & local_context) -{ - std::lock_guard lock(configuration_update_mutex); - configuration.update(local_context); - return configuration; -} - -void StorageS3::updateConfiguration(const ContextPtr & local_context) -{ - std::lock_guard lock(configuration_update_mutex); - configuration.update(local_context); -} - -void StorageS3::useConfiguration(const StorageS3::Configuration & new_configuration) -{ - std::lock_guard lock(configuration_update_mutex); - configuration = new_configuration; -} - -StorageS3::Configuration StorageS3::getConfigurationCopy() const -{ - std::lock_guard lock(configuration_update_mutex); - return configuration; -} - -String StorageS3::getFormatCopy() const -{ - std::lock_guard lock(configuration_update_mutex); - return configuration.format; -} - -bool StorageS3::Configuration::update(const ContextPtr & context) -{ - auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString(), context->getUserName()); - request_settings = s3_settings.request_settings; - request_settings.updateFromSettings(context->getSettings()); - - if (client && (static_configuration || !auth_settings.hasUpdates(s3_settings.auth_settings))) - return false; - - auth_settings.updateFrom(s3_settings.auth_settings); - keys[0] = url.key; - connect(context); - return true; -} - -void StorageS3::Configuration::connect(const ContextPtr & context) -{ - const Settings & global_settings = context->getGlobalContext()->getSettingsRef(); - const Settings & local_settings = context->getSettingsRef(); - - if (S3::isS3ExpressEndpoint(url.endpoint) && auth_settings.region.empty()) - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets"); - - S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( - auth_settings.region, - context->getRemoteHostFilter(), - static_cast(global_settings.s3_max_redirects), - static_cast(global_settings.s3_retry_attempts), - global_settings.enable_s3_requests_logging, - /* for_disk_s3 = */ false, - request_settings.get_request_throttler, - request_settings.put_request_throttler, - url.uri.getScheme()); - - client_configuration.endpointOverride = url.endpoint; - /// seems as we don't use it - client_configuration.maxConnections = static_cast(request_settings.max_connections); - client_configuration.connectTimeoutMs = local_settings.s3_connect_timeout_ms; - client_configuration.http_keep_alive_timeout = S3::DEFAULT_KEEP_ALIVE_TIMEOUT; - client_configuration.http_keep_alive_max_requests = S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS; - - auto headers = auth_settings.headers; - if (!headers_from_ast.empty()) - headers.insert(headers.end(), headers_from_ast.begin(), headers_from_ast.end()); - - client_configuration.requestTimeoutMs = request_settings.request_timeout_ms; - - S3::ClientSettings client_settings{ - .use_virtual_addressing = url.is_virtual_hosted_style, - .disable_checksum = local_settings.s3_disable_checksum, - .gcs_issue_compose_request = context->getConfigRef().getBool("s3.gcs_issue_compose_request", false), - .is_s3express_bucket = S3::isS3ExpressEndpoint(url.endpoint), - }; - - auto credentials - = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token); - client = S3::ClientFactory::instance().create( - client_configuration, - client_settings, - credentials.GetAWSAccessKeyId(), - credentials.GetAWSSecretKey(), - auth_settings.server_side_encryption_customer_key_base64, - auth_settings.server_side_encryption_kms_config, - std::move(headers), - S3::CredentialsConfiguration{ - auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)), - auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)), - auth_settings.expiration_window_seconds.value_or( - context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), - auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), - }, - credentials.GetSessionToken()); -} - -bool StorageS3::Configuration::withGlobsIgnorePartitionWildcard() const -{ - if (!withPartitionWildcard()) - return withGlobs(); - - return PartitionedSink::replaceWildcards(getPath(), "").find_first_of("*?{") != std::string::npos; -} - -void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection) -{ - validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); - - auto filename = collection.getOrDefault("filename", ""); - if (!filename.empty()) - configuration.url = S3::URI(std::filesystem::path(collection.get("url")) / filename); - else - configuration.url = S3::URI(collection.get("url")); - - configuration.auth_settings.access_key_id = collection.getOrDefault("access_key_id", ""); - configuration.auth_settings.secret_access_key = collection.getOrDefault("secret_access_key", ""); - configuration.auth_settings.use_environment_credentials = collection.getOrDefault("use_environment_credentials", 1); - configuration.auth_settings.no_sign_request = collection.getOrDefault("no_sign_request", false); - configuration.auth_settings.expiration_window_seconds - = collection.getOrDefault("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS); - - configuration.format = collection.getOrDefault("format", configuration.format); - configuration.compression_method - = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); - configuration.structure = collection.getOrDefault("structure", "auto"); - - configuration.request_settings = S3Settings::RequestSettings(collection); -} - -StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file) -{ - StorageS3::Configuration configuration; - - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - { - processNamedCollectionResult(configuration, *named_collection); - } - else - { - /// Supported signatures: - /// - /// S3('url') - /// S3('url', 'format') - /// S3('url', 'format', 'compression') - /// S3('url', NOSIGN) - /// S3('url', NOSIGN, 'format') - /// S3('url', NOSIGN, 'format', 'compression') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format', 'compression') - /// with optional headers() function - - size_t count = StorageURL::evalArgsAndCollectHeaders(engine_args, configuration.headers_from_ast, local_context); - - if (count == 0 || count > 6) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage S3 requires 1 to 6 positional arguments: " - "url, [NOSIGN | access_key_id, secret_access_key], [session_token], [name of used format], [compression_method], [headers], [extra_credentials]"); - - std::unordered_map engine_args_to_idx; - bool no_sign_request = false; - - /// For 2 arguments we support 2 possible variants: - /// - s3(source, format) - /// - s3(source, NOSIGN) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - if (count == 2) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "format/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - no_sign_request = true; - else - engine_args_to_idx = {{"format", 1}}; - } - /// For 3 arguments we support 2 possible variants: - /// - s3(source, format, compression_method) - /// - s3(source, access_key_id, secret_access_key) - /// - s3(source, NOSIGN, format) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or format name. - else if (count == 3) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "format/access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - engine_args_to_idx = {{"format", 2}}; - } - else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) - engine_args_to_idx = {{"format", 1}, {"compression_method", 2}}; - else - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; - } - /// For 4 arguments we support 3 possible variants: - /// - s3(source, access_key_id, secret_access_key, session_token) - /// - s3(source, access_key_id, secret_access_key, format) - /// - s3(source, NOSIGN, format, compression_method) - /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN or not. - else if (count == 4) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - engine_args_to_idx = {{"format", 2}, {"compression_method", 3}}; - } - else - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; - else - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; - } - } - /// For 5 arguments we support 2 possible variants: - /// - s3(source, access_key_id, secret_access_key, session_token, format) - /// - s3(source, access_key_id, secret_access_key, format, compression) - else if (count == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}}; - else - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; - } - else if (count == 6) - { - engine_args_to_idx - = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}; - } - - /// This argument is always the first - configuration.url = S3::URI(checkAndGetLiteralArgument(engine_args[0], "url")); - - if (engine_args_to_idx.contains("format")) - configuration.format = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["format"]], "format"); - - if (engine_args_to_idx.contains("compression_method")) - configuration.compression_method - = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["compression_method"]], "compression_method"); - - if (engine_args_to_idx.contains("access_key_id")) - configuration.auth_settings.access_key_id - = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["access_key_id"]], "access_key_id"); - - if (engine_args_to_idx.contains("secret_access_key")) - configuration.auth_settings.secret_access_key - = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key"); - - if (engine_args_to_idx.contains("session_token")) - configuration.auth_settings.session_token - = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["session_token"]], "session_token"); - - if (no_sign_request) - configuration.auth_settings.no_sign_request = no_sign_request; - } - - configuration.static_configuration - = !configuration.auth_settings.access_key_id.empty() || configuration.auth_settings.no_sign_request.has_value(); - - configuration.keys = {configuration.url.key}; - - if (configuration.format == "auto" && get_format_from_file) - { - if (configuration.url.archive_pattern.has_value()) - { - configuration.format = FormatFactory::instance() - .tryGetFormatFromFileName(Poco::URI(configuration.url.archive_pattern.value()).getPath()) - .value_or("auto"); - } - else - { - configuration.format - = FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(configuration.url.uri_str).getPath()).value_or("auto"); - } - } - - return configuration; -} - -ColumnsDescription StorageS3::getTableStructureFromData( - const StorageS3::Configuration & configuration_, const std::optional & format_settings_, const ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(configuration_.format, configuration_, format_settings_, ctx).first; -} - -std::pair StorageS3::getTableStructureAndFormatFromData( - const StorageS3::Configuration & configuration, const std::optional & format_settings, const ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(std::nullopt, configuration, format_settings, ctx); -} - -class ReadBufferIterator : public IReadBufferIterator, WithContext -{ -public: - ReadBufferIterator( - std::shared_ptr file_iterator_, - const StorageS3Source::KeysWithInfo & read_keys_, - const StorageS3::Configuration & configuration_, - std::optional format_, - const std::optional & format_settings_, - ContextPtr context_) - : WithContext(context_) - , file_iterator(file_iterator_) - , read_keys(read_keys_) - , configuration(configuration_) - , format(std::move(format_)) - , format_settings(format_settings_) - , prev_read_keys_size(read_keys_.size()) - { - } - - Data next() override - { - if (first) - { - /// If format is unknown we iterate through all currently read keys on first iteration and - /// try to determine format by file name. - if (!format) - { - for (const auto & key_with_info : read_keys) - { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(key_with_info->getFileName())) - { - format = format_from_file_name; - break; - } - } - } - - /// For default mode check cached columns for currently read keys on first iteration. - if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns, format}; - } - } - - while (true) - { - current_key_with_info = (*file_iterator)(); - - if (!current_key_with_info || current_key_with_info->key.empty()) - { - if (first) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files with provided path " - "in S3 or all files are empty. You can specify table structure manually", - *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because there are no files with provided path " - "in S3 or all files are empty. You can specify the format manually"); - } - - return {nullptr, std::nullopt, format}; - } - - if (read_keys.size() > prev_read_keys_size) - { - /// If format is unknown we can try to determine it by new file names. - if (!format) - { - for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) - { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName())) - { - format = format_from_file_name; - break; - } - } - } - - /// Check new files in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); - if (columns_from_cache) - return {nullptr, columns_from_cache, format}; - } - - prev_read_keys_size = read_keys.size(); - } - - if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0) - continue; - - /// In union mode, check cached columns only for current key. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - StorageS3Source::KeysWithInfo keys = {current_key_with_info}; - if (auto columns_from_cache = tryGetColumnsFromCache(keys.begin(), keys.end())) - { - first = false; - return {nullptr, columns_from_cache, format}; - } - } - - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - std::unique_ptr impl; - - if (!current_key_with_info->path_in_archive.has_value()) - { - impl = std::make_unique( - configuration.client, - configuration.url.bucket, - current_key_with_info->key, - configuration.url.version_id, - configuration.request_settings, - getContext()->getReadSettings()); - } - else - { - assert(current_key_with_info->archive_reader); - impl = current_key_with_info->archive_reader->readFile( - current_key_with_info->path_in_archive.value(), /*throw_on_not_found=*/true); - } - if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof()) - { - first = false; - return { - wrapReadBufferWithCompressionMethod( - std::move(impl), - current_key_with_info->path_in_archive.has_value() - ? chooseCompressionMethod(current_key_with_info->path_in_archive.value(), configuration.compression_method) - : chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), - zstd_window_log_max), - std::nullopt, - format}; - } - } - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3) - return; - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) - / configuration.url.bucket / current_key_with_info->getPath(); - auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3 - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) - / configuration.url.bucket / current_key_with_info->getPath(); - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addColumns(cache_key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3 - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - auto host_and_bucket = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket; - Strings sources; - sources.reserve(read_keys.size()); - std::transform( - read_keys.begin(), - read_keys.end(), - std::back_inserter(sources), - [&](const auto & elem) { return host_and_bucket / elem->getPath(); }); - auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override - { - if (current_key_with_info) - return current_key_with_info->getPath(); - return ""; - } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - chassert(current_key_with_info); - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - auto impl = std::make_unique(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings()); - return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max); - } - -private: - std::optional tryGetColumnsFromCache( - const StorageS3Source::KeysWithInfo::const_iterator & begin, const StorageS3Source::KeysWithInfo::const_iterator & end) - { - auto context = getContext(); - if (!context->getSettingsRef().schema_inference_use_cache_for_s3) - return std::nullopt; - - auto & schema_cache = StorageS3::getSchemaCache(context); - for (auto it = begin; it < end; ++it) - { - auto get_last_mod_time = [&] - { - time_t last_modification_time = 0; - if ((*it)->info) - { - last_modification_time = (*it)->info->last_modification_time; - } - else - { - /// Note that in case of exception in getObjectInfo returned info will be empty, - /// but schema cache will handle this case and won't return columns from cache - /// because we can't say that it's valid without last modification time. - last_modification_time = S3::getObjectInfo( - *configuration.client, - configuration.url.bucket, - (*it)->key, - configuration.url.version_id, - configuration.request_settings, - /*with_metadata=*/ false, - /*throw_on_error= */ false).last_modification_time; - } - - return last_modification_time ? std::make_optional(last_modification_time) : std::nullopt; - }; - String path = fs::path(configuration.url.bucket) / (*it)->getPath(); - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path; - - if (format) - { - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; - } - else - { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry fcreateor some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) - { - auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } - } - } - } - - return std::nullopt; - } - - std::shared_ptr file_iterator; - const StorageS3Source::KeysWithInfo & read_keys; - const StorageS3::Configuration & configuration; - std::optional format; - const std::optional & format_settings; - StorageS3Source::KeyWithInfoPtr current_key_with_info; - size_t prev_read_keys_size; - bool first = true; -}; - -std::pair StorageS3::getTableStructureAndFormatFromDataImpl( - std::optional format, - const StorageS3::Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx) -{ - KeysWithInfo read_keys; - - auto file_iterator = createFileIterator(configuration, false, ctx, {}, {}, &read_keys); - - ReadBufferIterator read_buffer_iterator(file_iterator, read_keys, configuration, format, format_settings, ctx); - if (format) - return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, ctx), *format}; - return detectFormatAndReadSchema(format_settings, read_buffer_iterator, ctx); -} - -void registerStorageS3Impl(const String & name, StorageFactory & factory) -{ - factory.registerStorage(name, [](const StorageFactory::Arguments & args) - { - auto & engine_args = args.engine_args; - if (engine_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - - auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext()); - // Use format settings from global server context + settings from - // the SETTINGS clause of the create query. Settings from current - // session and user are ignored. - std::optional format_settings; - if (args.storage_def->settings) - { - FormatFactorySettings user_format_settings; - - // Apply changed settings from global context, but ignore the - // unknown ones, because we only have the format settings here. - const auto & changes = args.getContext()->getSettingsRef().changes(); - for (const auto & change : changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.set(change.name, change.value); - } - - // Apply changes from SETTINGS clause, with validation. - user_format_settings.applyChanges(args.storage_def->settings->changes); - format_settings = getFormatSettings(args.getContext(), user_format_settings); - } - else - { - format_settings = getFormatSettings(args.getContext()); - } - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - return std::make_shared( - std::move(configuration), - args.getContext(), - args.table_id, - args.columns, - args.constraints, - args.comment, - format_settings, - /* distributed_processing_ */false, - partition_by); - }, - { - .supports_settings = true, - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::S3, - }); -} - -void registerStorageS3(StorageFactory & factory) -{ - registerStorageS3Impl("S3", factory); - registerStorageS3Impl("COSN", factory); - registerStorageS3Impl("OSS", factory); -} - -bool StorageS3::supportsPartitionBy() const -{ - return true; -} - -SchemaCache & StorageS3::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_s3", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} -} - -#endif diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index a9801e3b910..dadf2f35e6e 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -137,7 +137,7 @@ select arrayUniq(thread_ids) from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '0' and Settings['max_insert_threads'] = '16'; -5 +18 select count() from testX; 60 select count() from testXA; @@ -185,7 +185,7 @@ select arrayUniq(thread_ids) from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '1' and Settings['max_insert_threads'] = '16'; -5 +18 select count() from testX; 80 select count() from testXA; diff --git a/tests/queries/0_stateless/01927_query_views_log_current_database.sql b/tests/queries/0_stateless/01927_query_views_log_current_database.sql index ba42795333c..6287156daaf 100644 --- a/tests/queries/0_stateless/01927_query_views_log_current_database.sql +++ b/tests/queries/0_stateless/01927_query_views_log_current_database.sql @@ -16,6 +16,7 @@ CREATE MATERIALIZED VIEW matview_b_to_c TO table_c AS SELECT SUM(a + sleepEachRo CREATE MATERIALIZED VIEW matview_join_d_e TO table_f AS SELECT table_d.a as a, table_e.count + sleepEachRow(0.000003) as count FROM table_d LEFT JOIN table_e ON table_d.a = table_e.a; -- ENABLE LOGS +SET parallel_view_processing=0; SET log_query_views=1; SET log_queries_min_type='QUERY_FINISH'; SET log_queries=1; diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference index e0cc8f0ce63..2d9f236ada9 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference @@ -1,8 +1,8 @@ -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent -18 18 9 18 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent -18 9 9 9 -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent -18 18 9 18 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 +18 36 27 36 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data +18 18 18 18 +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 +18 36 27 36 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data 18 18 18 18 diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql index 88d3165d060..6a155bcda46 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql @@ -1,6 +1,6 @@ -- Tags: long -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; drop table if exists test sync; drop table if exists test_mv_a sync; @@ -35,7 +35,7 @@ select (select sum(c) from test_mv_c where test='case1'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data'; set deduplicate_blocks_in_dependent_materialized_views=1; @@ -53,7 +53,7 @@ select (select sum(c) from test_mv_c where test='case2'); -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; set deduplicate_blocks_in_dependent_materialized_views=0; @@ -70,7 +70,7 @@ select (select sum(c) from test_mv_b where test='case3'), (select sum(c) from test_mv_c where test='case3'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data'; set deduplicate_blocks_in_dependent_materialized_views=1; diff --git a/tests/queries/0_stateless/02125_query_views_log.sql b/tests/queries/0_stateless/02125_query_views_log.sql index d2d19b76a1f..ba50902ebea 100644 --- a/tests/queries/0_stateless/02125_query_views_log.sql +++ b/tests/queries/0_stateless/02125_query_views_log.sql @@ -8,7 +8,7 @@ create table dst (key Int) engine=Null(); create materialized view mv1 to dst as select * from src; create materialized view mv2 to dst as select * from src; -insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=1; +insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=0; system flush logs; -- { echo } From 5f63abfd43e6946ff4f21d261f77e4eeb8b7d7c5 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 29 May 2024 14:31:11 +0200 Subject: [PATCH 088/439] work with tests --- .../Transforms/CountingTransform.cpp | 3 -- src/Processors/Transforms/CountingTransform.h | 2 -- .../Transforms/SquashingChunksTransform.cpp | 32 +++++++++---------- .../Transforms/buildPushingToViewsChain.cpp | 6 ++-- src/Storages/MergeTree/MergeTreeDataWriter.h | 1 - src/Storages/StorageLog.cpp | 1 + 6 files changed, 19 insertions(+), 26 deletions(-) diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index c138eed69de..d39c6575292 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -18,9 +18,6 @@ namespace DB void CountingTransform::onConsume(Chunk chunk) { - LOG_DEBUG(getLogger("CountingTransform"), - "onConsume rows {} bytes {}, progress rows {} bytes {}", chunk.getNumRows(), chunk.bytes(), progress.written_rows, progress.written_bytes); - if (quota) quota->used(QuotaType::WRITTEN_BYTES, chunk.bytes()); diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h index ab8d083fd05..4efcf147ac7 100644 --- a/src/Processors/Transforms/CountingTransform.h +++ b/src/Processors/Transforms/CountingTransform.h @@ -45,8 +45,6 @@ public: void onConsume(Chunk chunk) override; GenerateResult onGenerate() override { - LOG_DEBUG(getLogger("CountingTransform"), - "onGenerate {}", cur_chunk.getNumRows()); GenerateResult res; res.chunk = std::move(cur_chunk); return res; diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 2ee13c05b95..531d264a25a 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -17,8 +17,8 @@ SquashingChunksTransform::SquashingChunksTransform( void SquashingChunksTransform::onConsume(Chunk chunk) { - LOG_DEBUG(getLogger("SquashingChunksTransform"), - "onConsume {}", chunk.getNumRows()); + // LOG_DEBUG(getLogger("SquashingChunksTransform"), + // "onConsume {}", chunk.getNumRows()); auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); cur_chunk = Chunk(result.block.getColumns(), result.block.rows()); @@ -37,14 +37,14 @@ void SquashingChunksTransform::onConsume(Chunk chunk) cur_chunkinfos = {}; } - LOG_DEBUG(getLogger("SquashingChunksTransform"), - "got result rows {}, size {}, columns {}, infos: {}/{}", - cur_chunk.getNumRows(), cur_chunk.bytes(), cur_chunk.getNumColumns(), - cur_chunk.getChunkInfos().size(), cur_chunk.getChunkInfos().debug()); + // LOG_DEBUG(getLogger("SquashingChunksTransform"), + // "got result rows {}, size {}, columns {}, infos: {}/{}", + // cur_chunk.getNumRows(), cur_chunk.bytes(), cur_chunk.getNumColumns(), + // cur_chunk.getChunkInfos().size(), cur_chunk.getChunkInfos().debug()); } else { - assert(!result.input_block_delayed); + assert(result.input_block_delayed); cur_chunkinfos = std::move(chunk.getChunkInfos()); } } @@ -90,10 +90,10 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( void SimpleSquashingChunksTransform::consume(Chunk chunk) { - LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - "transform rows {}, size {}, columns {}, infos: {}/{}", - chunk.getNumRows(), chunk.bytes(), chunk.getNumColumns(), - chunk.getChunkInfos().size(), chunk.getChunkInfos().debug()); + // LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), + // "transform rows {}, size {}, columns {}, infos: {}/{}", + // chunk.getNumRows(), chunk.bytes(), chunk.getNumColumns(), + // chunk.getChunkInfos().size(), chunk.getChunkInfos().debug()); auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); @@ -111,14 +111,14 @@ void SimpleSquashingChunksTransform::consume(Chunk chunk) squashed_info = {}; } - LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - "got result rows {}, size {}, columns {}, infos: {}/{}", - squashed_chunk.getNumRows(), squashed_chunk.bytes(), squashed_chunk.getNumColumns(), - squashed_chunk.getChunkInfos().size(), squashed_chunk.getChunkInfos().debug()); + // LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), + // "got result rows {}, size {}, columns {}, infos: {}/{}", + // squashed_chunk.getNumRows(), squashed_chunk.bytes(), squashed_chunk.getNumColumns(), + // squashed_chunk.getChunkInfos().size(), squashed_chunk.getChunkInfos().debug()); } else { - assert(!result.input_block_delayed); + chassert(result.input_block_delayed); squashed_info = std::move(chunk.getChunkInfos()); } } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index d44796610ed..996fe3efdc5 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -552,10 +552,8 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); } - else - { - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); - } + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index a9a44813545..863c951d957 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -47,7 +47,6 @@ public: : data(data_) , log(getLogger(data.getLogName() + " (Writer)")) { - LOG_DEBUG(log, "MergeTreeDataWriter() called from:\n{}", StackTrace().toString()); } /** Split the block to blocks, each of them must be written as separate part. diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 6ef16189335..8b1bf4637b4 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -1,6 +1,7 @@ #include #include +#include "Common/logger_useful.h" #include #include #include From 0a2d922d2324e4b3887ccb663f5611079f5dcd6a Mon Sep 17 00:00:00 2001 From: divanik Date: Wed, 29 May 2024 13:57:29 +0000 Subject: [PATCH 089/439] Fix help message printing --- programs/disks/CommandRemove.cpp | 3 +- programs/disks/CommandSwitchDisk.cpp | 7 +- programs/disks/DisksApp.cpp | 72 ++++++-- programs/disks/DisksApp.h | 5 +- programs/disks/DisksClient.cpp | 252 ++++++++++++++++++++++++++ programs/disks/DisksClient.h | 257 ++------------------------- 6 files changed, 332 insertions(+), 264 deletions(-) diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index f332267c780..b322fb2701f 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -10,8 +10,7 @@ public: CommandRemove() { command_name = "remove"; - description = "Remove file or directory with all children. Throws exception if file doesn't exists.\nPath should be in format './' " - "or './path' or 'path'"; + description = "Remove file or directory with all children. Throws exception if file doesn't exists"; options_description.add_options()("path", po::value(), "path from which we copy (mandatory, positional)"); positional_options_description.add("path", 1); } diff --git a/programs/disks/CommandSwitchDisk.cpp b/programs/disks/CommandSwitchDisk.cpp index 22d56673832..cbcf8e93bfc 100644 --- a/programs/disks/CommandSwitchDisk.cpp +++ b/programs/disks/CommandSwitchDisk.cpp @@ -7,18 +7,13 @@ namespace DB { -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -}; - class CommandSwitchDisk final : public ICommand { public: explicit CommandSwitchDisk() : ICommand() { command_name = "switch-disk"; - description = "Change disk"; + description = "Switch disk"; options_description.add_options()("disk", po::value(), "the disk to switch to (mandatory, positional)")( "path", po::value(), "the path to switch on the disk"); positional_options_description.add("disk", 1); diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index a990d85a9d1..3df6776f4ec 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -44,7 +44,7 @@ CommandPtr DisksApp::getCommandByName(String command) const } catch (...) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The command {} is unknown", command); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The command `{}` is unknown", command); } } @@ -199,6 +199,10 @@ bool DisksApp::processQueryText(String text) std::cerr << "COMMAND: " << command->command_name << "\n"; std::cerr << command->options_description << "\n"; } + else + { + printAvailableCommandsHelpMessage(); + } } else { @@ -282,34 +286,70 @@ void DisksApp::processOptions() config().setString("log-level", options["log-level"].as()); } -void DisksApp::printHelpMessage(const ProgramOptionsDescription &) + +void DisksApp::printEntryHelpMessage() { - std::optional help_description - = createOptionsDescription("Help Message for clickhouse-disks", getTerminalWidth()); - - help_description->add(options_description); - std::cout << "ClickHouse disk management tool\n"; - std::cout << "Usage: ./clickhouse-disks [OPTION]\n"; - std::cout << "clickhouse-disks\n\n"; + std::cout << options_description << '\n'; +} +size_t DisksApp::getMagicConstant() +{ + size_t magic_constant = 0; for (const auto & [current_command, _] : command_descriptions) { - std::cout << command_descriptions[current_command]->command_name; + std::string command_string{}; + command_string += command_descriptions[current_command]->command_name; bool was = false; for (const auto & [alias_name, alias_command_name] : aliases) { if (alias_command_name == current_command) { if (was) - std::cout << ","; + command_string += ","; else - std::cout << "("; - std::cout << alias_name; + command_string += "("; + command_string += alias_name; was = true; } } - std::cout << (was ? ")" : "") << " \t" << command_descriptions[current_command]->description << "\n\n"; + command_string += (was ? ")" : ""); + + magic_constant = std::max(magic_constant, command_string.size()); + } + return magic_constant + 2; +} + +void DisksApp::printAvailableCommandsHelpMessage() +{ + size_t magic_constant = getMagicConstant(); + + std::cout << "\x1b[1;33mAvailable commands:\x1b[0m\n"; + for (const auto & [current_command, _] : command_descriptions) + { + std::string command_string{}; + command_string += command_descriptions[current_command]->command_name; + bool was = false; + for (const auto & [alias_name, alias_command_name] : aliases) + { + if (alias_command_name == current_command) + { + if (was) + command_string += ","; + else + command_string += "("; + command_string += alias_name; + was = true; + } + } + command_string += (was ? ")" : ""); + std::cout << "\x1b[1;32m" << command_string << "\x1b[0m"; + for (size_t i = command_string.size(); i < magic_constant; ++i) + { + std::cout << " "; + } + + std::cout << command_descriptions[current_command]->description << "\n"; } } @@ -347,7 +387,8 @@ void DisksApp::init(const std::vector & common_arguments) if (options.count("help")) { - printHelpMessage(options_description); + printEntryHelpMessage(); + printAvailableCommandsHelpMessage(); exit(0); // NOLINT(concurrency-mt-unsafe) } @@ -373,6 +414,7 @@ int DisksApp::main(const std::vector & /*args*/) } else { + printEntryHelpMessage(); throw Exception(ErrorCodes::BAD_ARGUMENTS, "No config-file specified"); } diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h index a0ce98b51d0..7333e5804ae 100644 --- a/programs/disks/DisksApp.h +++ b/programs/disks/DisksApp.h @@ -38,12 +38,15 @@ public: static void parseAndCheckOptions( const std::vector & arguments, const ProgramOptionsDescription & options_description, CommandLineOptions & options); - void printHelpMessage(const ProgramOptionsDescription &); + void printEntryHelpMessage(); + void printAvailableCommandsHelpMessage(); std::vector getCompletions(const String & prefix) const; std::vector getEmptyCompletion(CommandPtr command_) const; + size_t getMagicConstant(); + ~DisksApp() override; private: diff --git a/programs/disks/DisksClient.cpp b/programs/disks/DisksClient.cpp index 8e90f0a82c1..509892a39a2 100644 --- a/programs/disks/DisksClient.cpp +++ b/programs/disks/DisksClient.cpp @@ -13,8 +13,15 @@ #include #include +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int LOGICAL_ERROR; +}; + namespace DB { + std::vector split(const String & text, const String & delimiters) { std::vector arguments; @@ -34,5 +41,250 @@ std::vector split(const String & text, const String & delimiters) arguments.push_back({prev, text.end()}); } return arguments; +} + +DiskWithPath::DiskWithPath(DiskPtr disk_, std::optional path_) + : disk(disk_) + , path( + [&]() + { + if (path_.has_value()) + { + if (!fs::path{path_.value()}.is_absolute()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} is not absolute", path_.value()); + } + return path_.value(); + } + else + { + return String{"/"}; + } + }()) +{ + if (!disk->isDirectory(normalizePathAndGetAsRelative(path))) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} at disk {} is not a directory", path, disk->getName()); } } + +std::vector DiskWithPath::listAllFilesByPath(const String & any_path) const +{ + if (isDirectory(any_path)) + { + std::vector file_names; + disk->listFiles(getRelativeFromRoot(any_path), file_names); + return file_names; + } + else + { + return {}; + } +} + +std::vector DiskWithPath::getAllFilesByPattern(std::string pattern) const +{ + auto [path_before, path_after] = [&]() -> std::pair + { + auto slash_pos = pattern.find_last_of('/'); + if (slash_pos >= pattern.size()) + { + return {"", pattern}; + } + else + { + return {pattern.substr(0, slash_pos + 1), pattern.substr(slash_pos + 1, pattern.size() - slash_pos - 1)}; + } + }(); + + if (!isDirectory(path_before)) + { + return {}; + } + else + { + std::vector file_names = listAllFilesByPath(path_before); + + std::vector answer; + + for (const auto & file_name : file_names) + { + if (file_name.starts_with(path_after)) + { + String file_pattern = path_before + file_name; + if (isDirectory(file_pattern)) + { + file_pattern = file_pattern + "/"; + } + answer.push_back(file_pattern); + } + } + return answer; + } +}; + +void DiskWithPath::setPath(const String & any_path) +{ + if (isDirectory(any_path)) + { + path = getAbsolutePath(any_path); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} at disk {} is not a directory", any_path, disk->getName()); + } +} + +String DiskWithPath::validatePathAndGetAsRelative(const String & path) +{ + String lexically_normal_path = fs::path(path).lexically_normal(); + if (lexically_normal_path.find("..") != std::string::npos) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Path {} is not normalized", path); + + /// If path is absolute we should keep it as relative inside disk, so disk will look like + /// an ordinary filesystem with root. + if (fs::path(lexically_normal_path).is_absolute()) + return lexically_normal_path.substr(1); + + return lexically_normal_path; +} + +std::string DiskWithPath::normalizePathAndGetAsRelative(const std::string & messyPath) +{ + std::filesystem::path path(messyPath); + std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path); + std::string npath = canonical_path.make_preferred().string(); + return validatePathAndGetAsRelative(npath); +} + +std::string DiskWithPath::normalizePath(const std::string & messyPath) +{ + std::filesystem::path path(messyPath); + std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path); + return canonical_path.make_preferred().string(); +} + +DisksClient::DisksClient(std::vector>> && disks_with_paths, std::optional begin_disk) +{ + if (disks_with_paths.empty()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing array of disks is empty"); + } + if (!begin_disk.has_value()) + { + begin_disk = disks_with_paths[0].first->getName(); + } + bool has_begin_disk = true; + for (auto & [disk, path] : disks_with_paths) + { + addDisk(disk, path); + if (disk->getName() == begin_disk.value()) + { + has_begin_disk = true; + } + } + if (!has_begin_disk) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no begin_disk '{}' in initializing array", begin_disk.value()); + } + current_disk = std::move(begin_disk.value()); +} + +const DiskWithPath & DisksClient::getDiskWithPath(const String & disk) const +{ + try + { + return disks.at(disk); + } + catch (...) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk); + } +} + +DiskWithPath & DisksClient::getDiskWithPath(const String & disk) +{ + try + { + return disks.at(disk); + } + catch (...) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk); + } +} + +const DiskWithPath & DisksClient::getCurrentDiskWithPath() const +{ + try + { + return disks.at(current_disk); + } + catch (...) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no current disk in client"); + } +} + +DiskWithPath & DisksClient::getCurrentDiskWithPath() +{ + try + { + return disks.at(current_disk); + } + catch (...) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no current disk in client"); + } +} + +void DisksClient::switchToDisk(const String & disk_, const std::optional & path_) +{ + if (disks.contains(disk_)) + { + if (path_.has_value()) + { + disks.at(disk_).setPath(path_.value()); + } + current_disk = disk_; + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk_); + } +} + +std::vector DisksClient::getAllDiskNames() const +{ + std::vector answer{}; + answer.reserve(disks.size()); + for (const auto & [disk_name, _] : disks) + { + answer.push_back(disk_name); + } + return answer; +} + +std::vector DisksClient::getAllFilesByPatternFromAllDisks(std::string pattern) const +{ + std::vector answer{}; + for (const auto & [_, disk] : disks) + { + for (auto & word : disk.getAllFilesByPattern(pattern)) + { + answer.push_back(word); + } + } + return answer; +} + +void DisksClient::addDisk(DiskPtr disk_, const std::optional & path_) +{ + String disk_name = disk_->getName(); + if (disks.contains(disk_->getName())) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' already exists", disk_name); + } + disks.emplace(disk_name, DiskWithPath{disk_, path_}); +} +} diff --git a/programs/disks/DisksClient.h b/programs/disks/DisksClient.h index 0fc20125e21..5c2de7aa529 100644 --- a/programs/disks/DisksClient.h +++ b/programs/disks/DisksClient.h @@ -12,9 +12,7 @@ #include #include #include -#include "Common/Exception.h" -// #include namespace fs = std::filesystem; namespace DB @@ -25,40 +23,10 @@ std::vector split(const String & text, const String & delimiters); using ProgramOptionsDescription = boost::program_options::options_description; using CommandLineOptions = boost::program_options::variables_map; - -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -extern const int LOGICAL_ERROR; -}; - class DiskWithPath { public: - explicit DiskWithPath(DiskPtr disk_, std::optional path_ = std::nullopt) - : disk(disk_) - , path( - [&]() - { - if (path_.has_value()) - { - if (!fs::path{path_.value()}.is_absolute()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} is not absolute", path_.value()); - } - return path_.value(); - } - else - { - return String{"/"}; - } - }()) - { - if (!disk->isDirectory(normalizePathAndGetAsRelative(path))) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} at disk {} is not a directory", path, disk->getName()); - } - } + explicit DiskWithPath(DiskPtr disk_, std::optional path_ = std::nullopt); String getAbsolutePath(const String & any_path) const { return normalizePath(fs::path(path) / any_path); } @@ -66,106 +34,20 @@ public: bool isDirectory(const String & any_path) const { return disk->isDirectory(getRelativeFromRoot(any_path)); } - std::vector listAllFilesByPath(const String & any_path) const - { - if (isDirectory(any_path)) - { - std::vector file_names; - disk->listFiles(getRelativeFromRoot(any_path), file_names); - return file_names; - } - else - { - return {}; - } - } + std::vector listAllFilesByPath(const String & any_path) const; - std::vector getAllFilesByPattern(std::string pattern) const - { - auto [path_before, path_after] = [&]() -> std::pair - { - auto slash_pos = pattern.find_last_of('/'); - if (slash_pos >= pattern.size()) - { - return {"", pattern}; - } - else - { - return {pattern.substr(0, slash_pos + 1), pattern.substr(slash_pos + 1, pattern.size() - slash_pos - 1)}; - } - }(); - - if (!isDirectory(path_before)) - { - return {}; - } - else - { - std::vector file_names = listAllFilesByPath(path_before); - - std::vector answer; - - for (const auto & file_name : file_names) - { - if (file_name.starts_with(path_after)) - { - String file_pattern = path_before + file_name; - if (isDirectory(file_pattern)) - { - file_pattern = file_pattern + "/"; - } - answer.push_back(file_pattern); - } - } - return answer; - } - } + std::vector getAllFilesByPattern(std::string pattern) const; DiskPtr getDisk() const { return disk; } - void setPath(const String & any_path) - { - if (isDirectory(any_path)) - { - path = getAbsolutePath(any_path); - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} at disk {} is not a directory", any_path, disk->getName()); - } - } + void setPath(const String & any_path); String getRelativeFromRoot(const String & any_path) const { return normalizePathAndGetAsRelative(getAbsolutePath(any_path)); } private: - static String validatePathAndGetAsRelative(const String & path) - { - String lexically_normal_path = fs::path(path).lexically_normal(); - if (lexically_normal_path.find("..") != std::string::npos) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Path {} is not normalized", path); - - /// If path is absolute we should keep it as relative inside disk, so disk will look like - /// an ordinary filesystem with root. - if (fs::path(lexically_normal_path).is_absolute()) - return lexically_normal_path.substr(1); - - return lexically_normal_path; - } - - static std::string normalizePathAndGetAsRelative(const std::string & messyPath) - { - std::filesystem::path path(messyPath); - std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path); - std::string npath = canonical_path.make_preferred().string(); - return validatePathAndGetAsRelative(npath); - } - - static std::string normalizePath(const std::string & messyPath) - { - std::filesystem::path path(messyPath); - std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path); - return canonical_path.make_preferred().string(); - } + static String validatePathAndGetAsRelative(const String & path); + static std::string normalizePathAndGetAsRelative(const std::string & messyPath); + static std::string normalizePath(const std::string & messyPath); const DiskPtr disk; String path; @@ -174,134 +56,29 @@ private: class DisksClient { public: - explicit DisksClient(std::vector>> && disks_with_paths, std::optional begin_disk) - { - if (disks_with_paths.empty()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing array of disks is empty"); - } - if (!begin_disk.has_value()) - { - begin_disk = disks_with_paths[0].first->getName(); - } - bool has_begin_disk = true; - for (auto & [disk, path] : disks_with_paths) - { - addDisk(disk, path); - if (disk->getName() == begin_disk.value()) - { - has_begin_disk = true; - } - } - if (!has_begin_disk) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no begin_disk '{}' in initializing array", begin_disk.value()); - } - current_disk = std::move(begin_disk.value()); - } + explicit DisksClient(std::vector>> && disks_with_paths, std::optional begin_disk); - const DiskWithPath & getDiskWithPath(const String & disk) const - { - try - { - return disks.at(disk); - } - catch (...) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk); - } - } + const DiskWithPath & getDiskWithPath(const String & disk) const; - DiskWithPath & getDiskWithPath(const String & disk) - { - try - { - return disks.at(disk); - } - catch (...) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk); - } - } + DiskWithPath & getDiskWithPath(const String & disk); - const DiskWithPath & getCurrentDiskWithPath() const - { - try - { - return disks.at(current_disk); - } - catch (...) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no current disk in client"); - } - } + const DiskWithPath & getCurrentDiskWithPath() const; - DiskWithPath & getCurrentDiskWithPath() - { - try - { - return disks.at(current_disk); - } - catch (...) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no current disk in client"); - } - } + DiskWithPath & getCurrentDiskWithPath(); DiskPtr getCurrentDisk() const { return getCurrentDiskWithPath().getDisk(); } DiskPtr getDisk(const String & disk) const { return getDiskWithPath(disk).getDisk(); } - void switchToDisk(const String & disk_, const std::optional & path_) - { - if (disks.contains(disk_)) - { - if (path_.has_value()) - { - disks.at(disk_).setPath(path_.value()); - } - current_disk = disk_; - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk_); - } - } + void switchToDisk(const String & disk_, const std::optional & path_); - std::vector getAllDiskNames() const - { - std::vector answer{}; - answer.reserve(disks.size()); - for (const auto & [disk_name, _] : disks) - { - answer.push_back(disk_name); - } - return answer; - } + std::vector getAllDiskNames() const; + + std::vector getAllFilesByPatternFromAllDisks(std::string pattern) const; - std::vector getAllFilesByPatternFromAllDisks(std::string pattern) const - { - std::vector answer{}; - for (const auto & [_, disk] : disks) - { - for (auto & word : disk.getAllFilesByPattern(pattern)) - { - answer.push_back(word); - } - } - return answer; - } private: - void addDisk(DiskPtr disk_, const std::optional & path_) - { - String disk_name = disk_->getName(); - if (disks.contains(disk_->getName())) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' already exists", disk_name); - } - disks.emplace(disk_name, DiskWithPath{disk_, path_}); - } + void addDisk(DiskPtr disk_, const std::optional & path_); String current_disk; std::unordered_map disks; From 3133b757d797c32fb52be89e7299c5a17a6237c7 Mon Sep 17 00:00:00 2001 From: divanik Date: Wed, 29 May 2024 14:59:50 +0000 Subject: [PATCH 090/439] Add query for a non-interactive mode --- programs/disks/DisksApp.cpp | 13 +++++++++++-- programs/disks/DisksApp.h | 2 ++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 3df6776f4ec..bd4c5293c70 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -256,7 +256,7 @@ void DisksApp::addOptions() { options_description.add_options()("help,h", "Print common help message")("config-file,C", po::value(), "Set config file")( "disk", po::value(), "Set disk name")("save-logs", "Save logs to a file")( - "log-level", po::value(), "Logging level"); + "log-level", po::value(), "Logging level")("query,q", po::value(), "Query for a non-interactive mode"); command_descriptions.emplace("list-disks", makeCommandListDisks()); command_descriptions.emplace("copy", makeCommandCopy()); @@ -284,6 +284,8 @@ void DisksApp::processOptions() config().setBool("save-logs", true); if (options.count("log-level")) config().setString("log-level", options["log-level"].as()); + if (options.count("query")) + query = std::optional{options["query"].as()}; } @@ -468,7 +470,14 @@ int DisksApp::main(const std::vector & /*args*/) suggest.setCompletionsCallback([&](const String & prefix, size_t /* prefix_length */) { return getCompletions(prefix); }); - runInteractiveReplxx(); + if (!query.has_value()) + { + runInteractiveReplxx(); + } + else + { + processQueryText(query.value()); + } return Application::EXIT_OK; } diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h index 7333e5804ae..fcb4b0b52f5 100644 --- a/programs/disks/DisksApp.h +++ b/programs/disks/DisksApp.h @@ -67,6 +67,8 @@ private: CommandLineOptions options; std::unordered_map command_descriptions; + std::optional query; + const std::unordered_map aliases = {{"cp", "copy"}, {"mv", "move"}, From 62c764c2169cd5bed627bd670a5d93fa854c1fa2 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 29 May 2024 17:36:43 +0200 Subject: [PATCH 091/439] work with tests --- tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql index 06fe156500d..450d92476a9 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql @@ -54,7 +54,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view - 1st insert works for landing and mv tables - 2nd insert gets first block 20220901 deduplicated and second one inserted in landing table - - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded + - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded, now that block is inserted because deduplicate_blocks_in_dependent_materialized_views=0 Now it is fixed. */ From c25e9ecde35fad2b72f919fbdf54381fa184538f Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 30 May 2024 13:04:55 +0200 Subject: [PATCH 092/439] work with tests --- src/Processors/Sinks/SinkToStorage.cpp | 5 +++++ src/Processors/Transforms/NumberBlocksTransform.cpp | 8 +++++++- src/Processors/Transforms/NumberBlocksTransform.h | 2 ++ src/Processors/Transforms/buildPushingToViewsChain.cpp | 5 ++++- 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 36bb70f493f..c166ec81af7 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace DB @@ -16,6 +17,10 @@ void SinkToStorage::onConsume(Chunk chunk) Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); consume(chunk); + + // Add comment here + DeduplicationToken::SetInitialTokenTransform::setInitialToken(chunk); + cur_chunk = std::move(chunk); } diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/NumberBlocksTransform.cpp index 11054f652ff..d51fe67c868 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.cpp +++ b/src/Processors/Transforms/NumberBlocksTransform.cpp @@ -105,7 +105,7 @@ void CheckTokenTransform::transform(Chunk & chunk) LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, token: {}", debug, token_info->getToken(false)); } -void SetInitialTokenTransform::transform(Chunk & chunk) +void SetInitialTokenTransform::setInitialToken(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); @@ -127,6 +127,12 @@ void SetInitialTokenTransform::transform(Chunk & chunk) token_info->setInitialToken(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); } + +void SetInitialTokenTransform::transform(Chunk & chunk) +{ + setInitialToken(chunk); +} + void SetUserTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index b4f61eb887c..a2e48d9b548 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -121,6 +121,8 @@ namespace DeduplicationToken String getName() const override { return "DeduplicationToken::SetInitialTokenTransform"; } void transform(Chunk & chunk) override; + + static void setInitialToken(Chunk & chunk); }; class ResetTokenTransform : public ISimpleTransform diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 996fe3efdc5..46ca109fe0f 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -552,8 +552,11 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); } + else + { + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + } - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); From 75c484a73b582308b5bb0f9f1fac338ccdfa2ee3 Mon Sep 17 00:00:00 2001 From: divanik Date: Thu, 30 May 2024 14:07:25 +0000 Subject: [PATCH 093/439] Corrected build and some integrations tests --- programs/disks/CommandChangeDirectory.cpp | 2 +- programs/disks/CommandList.cpp | 23 +++-- programs/disks/CommandSwitchDisk.cpp | 2 +- programs/disks/ICommand.cpp | 2 +- .../test_backup_restore_s3/test.py | 1 + tests/integration/test_disk_types/test.py | 5 +- tests/integration/test_disks_app_func/test.py | 84 +++++++++---------- .../test_endpoint_macro_substitution/test.py | 5 +- tests/integration/test_multiple_disks/test.py | 3 + 9 files changed, 68 insertions(+), 59 deletions(-) diff --git a/programs/disks/CommandChangeDirectory.cpp b/programs/disks/CommandChangeDirectory.cpp index 5e6a08cd3fd..71cdae904e5 100644 --- a/programs/disks/CommandChangeDirectory.cpp +++ b/programs/disks/CommandChangeDirectory.cpp @@ -13,7 +13,7 @@ public: explicit CommandChangeDirectory() : ICommand() { command_name = "cd"; - description = "Change directory"; + description = "Change directory (makes sense only in interactive mode)"; options_description.add_options()("path", po::value(), "the path we want to get to (mandatory, positional)")( "disk", po::value(), "A disk where the path is changed"); positional_options_description.add("path", 1); diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index f91f0c6455c..b2361532ceb 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -24,10 +24,10 @@ public: bool recursive = options.count("recursive"); bool show_hidden = options.count("all"); auto disk = client.getCurrentDiskWithPath(); - String path = getValueFromCommandLineOptionsWithDefault(options, "path", ""); + String path = getValueFromCommandLineOptionsWithDefault(options, "path", "."); if (recursive) - listRecursive(disk, disk.getAbsolutePath(path), show_hidden); + listRecursive(disk, path, show_hidden); else list(disk, path, show_hidden); } @@ -49,12 +49,13 @@ private: } } - static void listRecursive(const DiskWithPath & disk, const std::string & absolute_path, bool show_hidden) + static void listRecursive(const DiskWithPath & disk, const std::string & relative_path, bool show_hidden) { - std::vector file_names = disk.listAllFilesByPath(absolute_path); + // std::cerr << absolute_path << std::endl; + std::vector file_names = disk.listAllFilesByPath(relative_path); std::vector selected_and_sorted_file_names{}; - std::cout << absolute_path << ":\n"; + std::cout << relative_path << ":\n"; if (!file_names.empty()) { @@ -72,7 +73,17 @@ private: for (const auto & file_name : selected_and_sorted_file_names) { - auto path = absolute_path + "/" + file_name; + auto path = [&]() -> String + { + if (relative_path.ends_with("/")) + { + return relative_path + file_name; + } + else + { + return relative_path + "/" + file_name; + } + }(); if (disk.isDirectory(path)) listRecursive(disk, path, show_hidden); } diff --git a/programs/disks/CommandSwitchDisk.cpp b/programs/disks/CommandSwitchDisk.cpp index cbcf8e93bfc..9e5d443ebeb 100644 --- a/programs/disks/CommandSwitchDisk.cpp +++ b/programs/disks/CommandSwitchDisk.cpp @@ -13,7 +13,7 @@ public: explicit CommandSwitchDisk() : ICommand() { command_name = "switch-disk"; - description = "Switch disk"; + description = "Switch disk (makes sense only in interactive mode)"; options_description.add_options()("disk", po::value(), "the disk to switch to (mandatory, positional)")( "path", po::value(), "the path to switch on the disk"); positional_options_description.add("disk", 1); diff --git a/programs/disks/ICommand.cpp b/programs/disks/ICommand.cpp index 7a70a61bf6c..41fa281794e 100644 --- a/programs/disks/ICommand.cpp +++ b/programs/disks/ICommand.cpp @@ -32,7 +32,7 @@ void ICommand::execute(const Strings & commands, DisksClient & client) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}", exc.what()); } - return executeImpl(processCommandLineArguments(commands), client); + executeImpl(processCommandLineArguments(commands), client); } DiskWithPath & ICommand::getDiskWithPath(DisksClient & client, const CommandLineOptions & options, const String & name) diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 967ed6a221c..8a07041ced9 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -255,6 +255,7 @@ def check_system_tables(backup_query_id=None): ("disk_s3_other_bucket", "ObjectStorage", "S3", "Local"), ("disk_s3_plain", "ObjectStorage", "S3", "Plain"), ("disk_s3_restricted_user", "ObjectStorage", "S3", "Local"), + ("local", "Local", "None", "None"), ) assert len(expected_disks) == len(disks) for expected_disk in expected_disks: diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index 1cc5048eb69..609a3168e77 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -6,6 +6,7 @@ disk_types = { "default": "Local", "disk_s3": "S3", "disk_encrypted": "S3", + "local": "Local" } # do not test HDFS on ARM @@ -95,9 +96,9 @@ def test_select_by_type(cluster): if disk_type == "Local": assert ( node.query( - "SELECT name FROM system.disks WHERE type='" + disk_type + "'" + "SELECT name FROM system.disks WHERE type='" + disk_type + "' ORDER BY name" ) - == name + "\n" + == "default\nlocal\n" ) elif disk_type == "S3": assert ( diff --git a/tests/integration/test_disks_app_func/test.py b/tests/integration/test_disks_app_func/test.py index 97d5da787cd..dc2a538f64a 100644 --- a/tests/integration/test_disks_app_func/test.py +++ b/tests/integration/test_disks_app_func/test.py @@ -47,12 +47,12 @@ def test_disks_app_func_ld(started_cluster): source = cluster.instances["disks_app_test"] out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "list-disks"] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--query", "list-disks"] ) - disks = out.split("\n") + disks = list(sorted(map(lambda x : x.split(':')[0], filter(lambda x : len(x) > 1, out.split("\n"))))) - assert disks[0] == "default" and disks[1] == "test1" and disks[2] == "test2" + assert disks[:4] == ["default", "local", "test1", "test2"] def test_disks_app_func_ls(started_cluster): @@ -61,7 +61,7 @@ def test_disks_app_func_ls(started_cluster): init_data(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "--query", "list ."] ) files = out.split("\n") @@ -75,9 +75,8 @@ def test_disks_app_func_ls(started_cluster): "--save-logs", "--disk", "test1", - "list", - ".", - "--recursive", + "--query", + "list . --recursive", ] ) @@ -102,8 +101,8 @@ def test_disks_app_func_cp(started_cluster): "--save-logs", "--disk", "test1", - "write", - "path1", + "--query", + "'write path1'", ] ), ] @@ -113,18 +112,13 @@ def test_disks_app_func_cp(started_cluster): [ "/usr/bin/clickhouse", "disks", - "copy", - "--disk-from", - "test1", - "--disk-to", - "test2", - ".", - ".", + "--query", + "copy --disk-from test1 --disk-to test2 . .", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "--query", "list ."] ) assert "path1" in out @@ -136,8 +130,8 @@ def test_disks_app_func_cp(started_cluster): "--save-logs", "--disk", "test2", - "remove", - "path1", + "--query", + "remove path1", ] ) @@ -148,21 +142,21 @@ def test_disks_app_func_cp(started_cluster): "--save-logs", "--disk", "test1", - "remove", - "path1", + "--query", + "remove path1", ] ) # alesapin: Why we need list one more time? # kssenii: it is an assertion that the file is indeed deleted out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "--query", "list ."] ) assert "path1" not in out out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "--query", "list ."] ) assert "path1" not in out @@ -177,14 +171,13 @@ def test_disks_app_func_ln(started_cluster): [ "/usr/bin/clickhouse", "disks", - "link", - "data/default/test_table", - "data/default/z_tester", + "--query", + "link data/default/test_table data/default/z_tester", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "list", "data/default/"] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--query", "list data/default/"] ) files = out.split("\n") @@ -209,15 +202,15 @@ def test_disks_app_func_rm(started_cluster): "--save-logs", "--disk", "test2", - "write", - "path3", + "--query", + "'write path3'", ] ), ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "--query", "list ."] ) assert "path3" in out @@ -229,13 +222,13 @@ def test_disks_app_func_rm(started_cluster): "--save-logs", "--disk", "test2", - "remove", - "path3", + "--query", + "remove path3", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "--query", "list ."] ) assert "path3" not in out @@ -247,7 +240,8 @@ def test_disks_app_func_mv(started_cluster): init_data(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", + "--query", "list ."] ) files = out.split("\n") @@ -260,14 +254,13 @@ def test_disks_app_func_mv(started_cluster): "disks", "--disk", "test1", - "move", - "store", - "old_store", + "--query", + "move store old_store", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "--query", "list ."] ) files = out.split("\n") @@ -290,8 +283,8 @@ def test_disks_app_func_read_write(started_cluster): "--save-logs", "--disk", "test1", - "write", - "5.txt", + "--query", + "'write 5.txt'", ] ), ] @@ -304,8 +297,8 @@ def test_disks_app_func_read_write(started_cluster): "--save-logs", "--disk", "test1", - "read", - "5.txt", + "--query", + "read 5.txt", ] ) @@ -319,7 +312,7 @@ def test_remote_disk_list(started_cluster): init_data_s3(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test3", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test3", "--query", "list ."] ) files = out.split("\n") @@ -333,9 +326,8 @@ def test_remote_disk_list(started_cluster): "--save-logs", "--disk", "test3", - "list", - ".", - "--recursive", + "--query", + "list . --recursive" ] ) diff --git a/tests/integration/test_endpoint_macro_substitution/test.py b/tests/integration/test_endpoint_macro_substitution/test.py index 7dc282a980f..46d7aad4672 100644 --- a/tests/integration/test_endpoint_macro_substitution/test.py +++ b/tests/integration/test_endpoint_macro_substitution/test.py @@ -8,6 +8,7 @@ disk_types = { "disk_s3": "S3", "disk_hdfs": "HDFS", "disk_encrypted": "S3", + "local": "Local" } if is_arm(): @@ -74,9 +75,9 @@ def test_select_by_type(cluster): if disk_type == "Local": assert ( node.query( - "SELECT name FROM system.disks WHERE type='" + disk_type + "'" + "SELECT name FROM system.disks WHERE type='" + disk_type + "' ORDER BY name" ) - == name + "\n" + == "default\nlocal\n" ) elif disk_type == "S3": assert ( diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index fdd81284b2a..83318c5efb0 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -83,6 +83,9 @@ def test_system_tables(start_cluster): "path": "/external/", "keep_free_space": "0", }, + { + "name": "local", "path": "/", "keep_free_space": "0" + } ] click_disk_data = json.loads( From 59a97713b06f1bb2ffd24087114dbff5a0eecee8 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 30 May 2024 16:37:59 +0200 Subject: [PATCH 094/439] work with tests --- src/Processors/Sinks/SinkToStorage.cpp | 5 ----- src/Processors/Transforms/buildPushingToViewsChain.cpp | 7 ++----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index c166ec81af7..36bb70f493f 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -1,5 +1,4 @@ #include -#include #include namespace DB @@ -17,10 +16,6 @@ void SinkToStorage::onConsume(Chunk chunk) Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); consume(chunk); - - // Add comment here - DeduplicationToken::SetInitialTokenTransform::setInitialToken(chunk); - cur_chunk = std::move(chunk); } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 9dc9531b7a1..7a32b6ff038 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -530,6 +530,8 @@ Chain buildPushingToViewsChain( result_chain = Chain(std::move(processors)); result_chain.setNumThreads(std::min(views_data->max_threads, max_parallel_streams)); result_chain.setConcurrencyControl(settings.use_concurrency_control); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } if (auto * live_view = dynamic_cast(storage.get())) @@ -552,11 +554,6 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); } - else - { - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); - } - if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); From 5fe2249300d2d5951329a39a49322bbd99cce614 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 31 May 2024 14:43:35 +0200 Subject: [PATCH 095/439] adjust tesy test_force_deduplication --- .../test_force_deduplication/test.py | 73 ++++++++++++++----- 1 file changed, 54 insertions(+), 19 deletions(-) diff --git a/tests/integration/test_force_deduplication/test.py b/tests/integration/test_force_deduplication/test.py index 87b2c45bbc5..14c11bc8500 100644 --- a/tests/integration/test_force_deduplication/test.py +++ b/tests/integration/test_force_deduplication/test.py @@ -29,6 +29,8 @@ def get_counts(): def test_basic(start_cluster): + old_src, old_a, old_b, old_c = 0, 0, 0, 0 + node.query( """ CREATE TABLE test (A Int64) ENGINE = ReplicatedMergeTree ('/clickhouse/test/tables/test','1') ORDER BY tuple(); @@ -39,6 +41,15 @@ def test_basic(start_cluster): INSERT INTO test values(999); """ ) + + src, a, b, c = get_counts() + assert src == old_src + 1 + assert a == old_a + 2 + assert b == old_b + 2 + assert c == old_c + 2 + old_src, old_a, old_b, old_c = src, a, b, c + + # that issert fails on test_mv_b due to partitions by A with pytest.raises(QueryRuntimeException): node.query( """ @@ -46,22 +57,23 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(10); """ ) + src, a, b, c = get_counts() + assert src == old_src + 10 + assert a == old_a + 10 + assert b == old_b + assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c - old_src, old_a, old_b, old_c = get_counts() - # number of rows in test_mv_a and test_mv_c depends on order of inserts into views - assert old_src == 11 - assert old_a in (1, 11) - assert old_b == 1 - assert old_c in (1, 11) - + # deduplication only for src table node.query("INSERT INTO test SELECT number FROM numbers(10)") src, a, b, c = get_counts() - # no changes because of deduplication in source table assert src == old_src - assert a == old_a - assert b == old_b - assert c == old_c + assert a == old_a + 10 + assert b == old_b + 10 + assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c + # deduplication for MV tables does not work, because previous inserts have not written their deduplications tokens to the log due to `deduplicate_blocks_in_dependent_materialized_views = 0`. node.query( """ SET deduplicate_blocks_in_dependent_materialized_views = 1; @@ -69,11 +81,27 @@ def test_basic(start_cluster): """ ) src, a, b, c = get_counts() - assert src == 11 - assert a == old_a + 10 # first insert could be succesfull with disabled dedup - assert b == 11 + assert src == old_src + assert a == old_a + 10 + assert b == old_b + 10 assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c + # deduplication for all the tables + node.query( + """ + SET deduplicate_blocks_in_dependent_materialized_views = 1; + INSERT INTO test SELECT number FROM numbers(10); + """ + ) + src, a, b, c = get_counts() + assert src == old_src + assert a == old_a + assert b == old_b + assert c == old_c + old_src, old_a, old_b, old_c = src, a, b, c + + # that issert fails on test_mv_b due to partitions by A, it is an uniq data which is not deduplicated with pytest.raises(QueryRuntimeException): node.query( """ @@ -82,16 +110,23 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(100,10); """ ) + src, a, b, c = get_counts() + assert src == old_src + 10 + assert a == old_a + 10 + assert b == old_b + assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c + # deduplication for all tables, except test_mv_b. For test_mv_b it is an uniq data which is not deduplicated due to exception at previous insert node.query( """ SET deduplicate_blocks_in_dependent_materialized_views = 1; INSERT INTO test SELECT number FROM numbers(100,10); """ ) - src, a, b, c = get_counts() - assert src == 21 - assert a == old_a + 20 - assert b == 21 - assert c == old_c + 20 + assert src == old_src + assert a == old_a + assert b == old_b + 10 + assert c == old_c + old_src, old_a, old_b, old_c = src, a, b, c From c3f72f0cf9180397359136941c7247a812576c61 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 31 May 2024 14:44:55 +0200 Subject: [PATCH 096/439] revert changes at helpers/s3_mocks/broken_s3.py --- tests/integration/helpers/s3_mocks/broken_s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py index 566d4739eb0..686abc76bdf 100644 --- a/tests/integration/helpers/s3_mocks/broken_s3.py +++ b/tests/integration/helpers/s3_mocks/broken_s3.py @@ -246,7 +246,7 @@ class _ServerRuntime: class BrokenPipeAction: def inject_error(self, request_handler): # partial read - request_handler.rfile.read(50) + self.rfile.read(50) time.sleep(1) request_handler.connection.setsockopt( From 439ac99897a8920c6d28c51318f8417a9dba445f Mon Sep 17 00:00:00 2001 From: divanik Date: Fri, 31 May 2024 13:10:42 +0000 Subject: [PATCH 097/439] Resolve several issues --- programs/disks/CommandList.cpp | 1 - programs/disks/DisksApp.cpp | 90 +++++++++++++--------------------- programs/disks/DisksApp.h | 15 +++--- programs/disks/DisksClient.cpp | 62 ++++++----------------- programs/disks/DisksClient.h | 4 +- programs/disks/ICommand.h | 10 +--- 6 files changed, 57 insertions(+), 125 deletions(-) diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index b2361532ceb..c21941c42ca 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -51,7 +51,6 @@ private: static void listRecursive(const DiskWithPath & disk, const std::string & relative_path, bool show_hidden) { - // std::cerr << absolute_path << std::endl; std::vector file_names = disk.listAllFilesByPath(relative_path); std::vector selected_and_sorted_file_names{}; diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index bd4c5293c70..4c1d98ec791 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -1,13 +1,9 @@ #include "DisksApp.h" #include #include -#include -#include #include "Common/Exception.h" +#include "Common/filesystemHelpers.h" #include -#include -#include -#include #include "DisksClient.h" #include "ICommand.h" @@ -30,19 +26,20 @@ extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; }; +LineReader::Patterns DisksApp::query_extenders = {"\\"}; +LineReader::Patterns DisksApp::query_delimiters = {""}; +String DisksApp::word_break_characters = " \t\v\f\a\b\r\n"; -CommandPtr DisksApp::getCommandByName(String command) const +CommandPtr DisksApp::getCommandByName(const String & command) const { - auto it = aliases.find(command); - if (it != aliases.end()) - { - command = it->second; - } try { + if (auto it = aliases.find(command); it != aliases.end()) + return command_descriptions.at(it->second); + return command_descriptions.at(command); } - catch (...) + catch (std::out_of_range &) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "The command `{}` is unknown", command); } @@ -75,7 +72,7 @@ std::vector DisksApp::getEmptyCompletion(CommandPtr command_) const std::vector DisksApp::getCompletions(const String & prefix) const { - auto arguments = split(prefix, word_break_characters); + auto arguments = po::split_unix(prefix, word_break_characters); if (arguments.empty()) { return {}; @@ -171,14 +168,14 @@ std::vector DisksApp::getCompletions(const String & prefix) const } } -bool DisksApp::processQueryText(String text) +bool DisksApp::processQueryText(const String & text) { if (exit_strings.find(text) != exit_strings.end()) return false; CommandPtr command; try { - auto arguments = split(text, word_break_characters); + auto arguments = po::split_unix(text, word_break_characters); command = getCommandByName(arguments[0]); arguments.erase(arguments.begin()); command->execute(arguments, *client); @@ -188,7 +185,7 @@ bool DisksApp::processQueryText(String text) int code = getCurrentExceptionCode(); if (code == ErrorCodes::LOGICAL_ERROR) { - throw std::move(err); + throw err; } else if (code == ErrorCodes::BAD_ARGUMENTS) { @@ -272,6 +269,13 @@ void DisksApp::addOptions() #ifdef CLICKHOUSE_CLOUD command_descriptions.emplace("packed-io", makeCommandPackedIO()); #endif + for (const auto & [command_name, command_ptr] : command_descriptions) + { + if (command_name != command_ptr->command_name) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Command name inside map doesn't coincide with actual command name"); + } + } } void DisksApp::processOptions() @@ -295,63 +299,35 @@ void DisksApp::printEntryHelpMessage() std::cout << options_description << '\n'; } -size_t DisksApp::getMagicConstant() -{ - size_t magic_constant = 0; - for (const auto & [current_command, _] : command_descriptions) - { - std::string command_string{}; - command_string += command_descriptions[current_command]->command_name; - bool was = false; - for (const auto & [alias_name, alias_command_name] : aliases) - { - if (alias_command_name == current_command) - { - if (was) - command_string += ","; - else - command_string += "("; - command_string += alias_name; - was = true; - } - } - command_string += (was ? ")" : ""); - - magic_constant = std::max(magic_constant, command_string.size()); - } - return magic_constant + 2; -} void DisksApp::printAvailableCommandsHelpMessage() { - size_t magic_constant = getMagicConstant(); - std::cout << "\x1b[1;33mAvailable commands:\x1b[0m\n"; + std::vector> commands_with_aliases_and_descrtiptions{}; + size_t maximal_command_length = 0; for (const auto & [current_command, _] : command_descriptions) { - std::string command_string{}; - command_string += command_descriptions[current_command]->command_name; - bool was = false; + std::string command_string = command_descriptions[current_command]->command_name; + bool need_comma = false; for (const auto & [alias_name, alias_command_name] : aliases) { if (alias_command_name == current_command) { - if (was) + if (std::exchange(need_comma, true)) command_string += ","; else command_string += "("; command_string += alias_name; - was = true; } } - command_string += (was ? ")" : ""); - std::cout << "\x1b[1;32m" << command_string << "\x1b[0m"; - for (size_t i = command_string.size(); i < magic_constant; ++i) - { - std::cout << " "; - } - - std::cout << command_descriptions[current_command]->description << "\n"; + command_string += (need_comma ? ")" : ""); + maximal_command_length = std::max(maximal_command_length, command_string.size()); + commands_with_aliases_and_descrtiptions.push_back({std::move(command_string), command_descriptions[current_command]->command_name}); + } + for (const auto & [command_with_aliases, description] : commands_with_aliases_and_descrtiptions) + { + std::cout << "\x1b[1;32m" << command_with_aliases << "\x1b[0m" + << std::string(maximal_command_length + 2 - command_with_aliases.size(), ' ') << description << "\n"; } } diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h index fcb4b0b52f5..fad597335f0 100644 --- a/programs/disks/DisksApp.h +++ b/programs/disks/DisksApp.h @@ -25,13 +25,13 @@ public: void processOptions(); - bool processQueryText(String text); + bool processQueryText(const String & text); void init(const std::vector & common_arguments); int main(const std::vector & /*args*/) override; - CommandPtr getCommandByName(String command) const; + CommandPtr getCommandByName(const String & command) const; void initializeHistoryFile(); @@ -45,8 +45,6 @@ public: std::vector getEmptyCompletion(CommandPtr command_) const; - size_t getMagicConstant(); - ~DisksApp() override; private: @@ -57,11 +55,11 @@ private: // Fields responsible for the REPL work String history_file; LineReader::Suggest suggest; - LineReader::Patterns query_extenders = {"\\"}; - LineReader::Patterns query_delimiters = {}; - String word_break_characters{" \t\v\f\a\b\r\n"}; + static LineReader::Patterns query_extenders; + static LineReader::Patterns query_delimiters; + static String word_break_characters; - // General commang line arguments parsing fields + // General command line arguments parsing fields ContextMutablePtr global_context; ProgramOptionsDescription options_description; CommandLineOptions options; @@ -87,5 +85,4 @@ private: std::unique_ptr client{}; }; - } diff --git a/programs/disks/DisksClient.cpp b/programs/disks/DisksClient.cpp index 509892a39a2..1ebfaf40096 100644 --- a/programs/disks/DisksClient.cpp +++ b/programs/disks/DisksClient.cpp @@ -1,17 +1,10 @@ #include "DisksClient.h" #include #include -#include -#include -#include -#include -#include -#include - #include +#include #include -#include namespace ErrorCodes { @@ -22,46 +15,20 @@ extern const int LOGICAL_ERROR; namespace DB { -std::vector split(const String & text, const String & delimiters) +DiskWithPath::DiskWithPath(DiskPtr disk_, std::optional path_) : disk(disk_) { - std::vector arguments; - auto prev = text.begin(); - auto pos = std::find_if(text.begin(), text.end(), [&](char x) { return delimiters.contains(x); }); - while (pos != text.end()) + if (path_.has_value()) { - if (pos > prev) + if (!fs::path{path_.value()}.is_absolute()) { - arguments.push_back({prev, pos}); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} is not absolute", path_.value()); } - prev = ++pos; - pos = std::find_if(prev, text.end(), [&](char x) { return delimiters.contains(x); }); + path = path_.value(); } - if (pos > prev) + else { - arguments.push_back({prev, text.end()}); + path = String{"/"}; } - return arguments; -} - -DiskWithPath::DiskWithPath(DiskPtr disk_, std::optional path_) - : disk(disk_) - , path( - [&]() - { - if (path_.has_value()) - { - if (!fs::path{path_.value()}.is_absolute()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} is not absolute", path_.value()); - } - return path_.value(); - } - else - { - return String{"/"}; - } - }()) -{ if (!disk->isDirectory(normalizePathAndGetAsRelative(path))) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} at disk {} is not a directory", path, disk->getName()); @@ -82,7 +49,7 @@ std::vector DiskWithPath::listAllFilesByPath(const String & any_path) co } } -std::vector DiskWithPath::getAllFilesByPattern(std::string pattern) const +std::vector DiskWithPath::getAllFilesByPattern(const String & pattern) const { auto [path_before, path_after] = [&]() -> std::pair { @@ -149,17 +116,16 @@ String DiskWithPath::validatePathAndGetAsRelative(const String & path) return lexically_normal_path; } -std::string DiskWithPath::normalizePathAndGetAsRelative(const std::string & messyPath) +String DiskWithPath::normalizePathAndGetAsRelative(const String & messyPath) { std::filesystem::path path(messyPath); std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path); - std::string npath = canonical_path.make_preferred().string(); + String npath = canonical_path.make_preferred().string(); return validatePathAndGetAsRelative(npath); } -std::string DiskWithPath::normalizePath(const std::string & messyPath) +String DiskWithPath::normalizePath(const String & path) { - std::filesystem::path path(messyPath); std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path); return canonical_path.make_preferred().string(); } @@ -174,7 +140,7 @@ DisksClient::DisksClient(std::vector>> { begin_disk = disks_with_paths[0].first->getName(); } - bool has_begin_disk = true; + bool has_begin_disk = false; for (auto & [disk, path] : disks_with_paths) { addDisk(disk, path); @@ -265,7 +231,7 @@ std::vector DisksClient::getAllDiskNames() const return answer; } -std::vector DisksClient::getAllFilesByPatternFromAllDisks(std::string pattern) const +std::vector DisksClient::getAllFilesByPatternFromAllDisks(const String & pattern) const { std::vector answer{}; for (const auto & [_, disk] : disks) diff --git a/programs/disks/DisksClient.h b/programs/disks/DisksClient.h index 5c2de7aa529..3320c5f7cef 100644 --- a/programs/disks/DisksClient.h +++ b/programs/disks/DisksClient.h @@ -36,7 +36,7 @@ public: std::vector listAllFilesByPath(const String & any_path) const; - std::vector getAllFilesByPattern(std::string pattern) const; + std::vector getAllFilesByPattern(const String & pattern) const; DiskPtr getDisk() const { return disk; } @@ -74,7 +74,7 @@ public: std::vector getAllDiskNames() const; - std::vector getAllFilesByPatternFromAllDisks(std::string pattern) const; + std::vector getAllFilesByPatternFromAllDisks(const String & pattern) const; private: diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h index 1e05aefd28b..d726d50ba13 100644 --- a/programs/disks/ICommand.h +++ b/programs/disks/ICommand.h @@ -4,14 +4,13 @@ #include #include +#include #include -#include #include #include "Common/Exception.h" #include -#include #include #include "DisksApp.h" @@ -46,8 +45,6 @@ public: CommandLineOptions processCommandLineArguments(const Strings & commands); - void exit() { options_parsed = false; } - protected: template static T getValueFromCommandLineOptions(const CommandLineOptions & options, const String & name) @@ -56,7 +53,7 @@ protected: { return options[name].as(); } - catch (...) + catch (boost::bad_any_cast) { throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Argument '{}' has wrong type and can't be parsed", name); } @@ -111,9 +108,6 @@ public: protected: PositionalProgramOptionsDescription positional_options_description; - -private: - bool options_parsed{}; }; DB::CommandPtr makeCommandCopy(); From 4b99a16b881c2c9e48c4f537cf96047a56e8d142 Mon Sep 17 00:00:00 2001 From: divanik Date: Fri, 31 May 2024 13:17:28 +0000 Subject: [PATCH 098/439] ../../tests --- tests/integration/test_disk_types/test.py | 12 ++- tests/integration/test_disks_app_func/test.py | 101 +++++++++++++++--- .../test_endpoint_macro_substitution/test.py | 6 +- tests/integration/test_multiple_disks/test.py | 4 +- 4 files changed, 101 insertions(+), 22 deletions(-) diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index 609a3168e77..f8893ef1dbc 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -6,7 +6,7 @@ disk_types = { "default": "Local", "disk_s3": "S3", "disk_encrypted": "S3", - "local": "Local" + "local": "Local", } # do not test HDFS on ARM @@ -20,9 +20,9 @@ def cluster(): cluster = ClickHouseCluster(__file__) cluster.add_instance( "node", - main_configs=["configs/storage_arm.xml"] - if is_arm() - else ["configs/storage_amd.xml"], + main_configs=( + ["configs/storage_arm.xml"] if is_arm() else ["configs/storage_amd.xml"] + ), with_minio=True, with_hdfs=not is_arm(), ) @@ -96,7 +96,9 @@ def test_select_by_type(cluster): if disk_type == "Local": assert ( node.query( - "SELECT name FROM system.disks WHERE type='" + disk_type + "' ORDER BY name" + "SELECT name FROM system.disks WHERE type='" + + disk_type + + "' ORDER BY name" ) == "default\nlocal\n" ) diff --git a/tests/integration/test_disks_app_func/test.py b/tests/integration/test_disks_app_func/test.py index dc2a538f64a..d643230d198 100644 --- a/tests/integration/test_disks_app_func/test.py +++ b/tests/integration/test_disks_app_func/test.py @@ -50,7 +50,13 @@ def test_disks_app_func_ld(started_cluster): ["/usr/bin/clickhouse", "disks", "--save-logs", "--query", "list-disks"] ) - disks = list(sorted(map(lambda x : x.split(':')[0], filter(lambda x : len(x) > 1, out.split("\n"))))) + disks = list( + sorted( + map( + lambda x: x.split(":")[0], filter(lambda x: len(x) > 1, out.split("\n")) + ) + ) + ) assert disks[:4] == ["default", "local", "test1", "test2"] @@ -61,7 +67,15 @@ def test_disks_app_func_ls(started_cluster): init_data(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "--query", "list ."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) files = out.split("\n") @@ -118,7 +132,15 @@ def test_disks_app_func_cp(started_cluster): ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "--query", "list ."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path1" in out @@ -150,13 +172,29 @@ def test_disks_app_func_cp(started_cluster): # alesapin: Why we need list one more time? # kssenii: it is an assertion that the file is indeed deleted out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "--query", "list ."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path1" not in out out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "--query", "list ."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) assert "path1" not in out @@ -210,7 +248,15 @@ def test_disks_app_func_rm(started_cluster): ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "--query", "list ."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path3" in out @@ -228,7 +274,15 @@ def test_disks_app_func_rm(started_cluster): ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "--query", "list ."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path3" not in out @@ -240,8 +294,15 @@ def test_disks_app_func_mv(started_cluster): init_data(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", - "--query", "list ."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) files = out.split("\n") @@ -260,7 +321,15 @@ def test_disks_app_func_mv(started_cluster): ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "--query", "list ."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) files = out.split("\n") @@ -312,7 +381,15 @@ def test_remote_disk_list(started_cluster): init_data_s3(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test3", "--query", "list ."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test3", + "--query", + "list .", + ] ) files = out.split("\n") @@ -327,7 +404,7 @@ def test_remote_disk_list(started_cluster): "--disk", "test3", "--query", - "list . --recursive" + "list . --recursive", ] ) diff --git a/tests/integration/test_endpoint_macro_substitution/test.py b/tests/integration/test_endpoint_macro_substitution/test.py index 46d7aad4672..16da53c03ed 100644 --- a/tests/integration/test_endpoint_macro_substitution/test.py +++ b/tests/integration/test_endpoint_macro_substitution/test.py @@ -8,7 +8,7 @@ disk_types = { "disk_s3": "S3", "disk_hdfs": "HDFS", "disk_encrypted": "S3", - "local": "Local" + "local": "Local", } if is_arm(): @@ -75,7 +75,9 @@ def test_select_by_type(cluster): if disk_type == "Local": assert ( node.query( - "SELECT name FROM system.disks WHERE type='" + disk_type + "' ORDER BY name" + "SELECT name FROM system.disks WHERE type='" + + disk_type + + "' ORDER BY name" ) == "default\nlocal\n" ) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index 83318c5efb0..ed29d4f9728 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -83,9 +83,7 @@ def test_system_tables(start_cluster): "path": "/external/", "keep_free_space": "0", }, - { - "name": "local", "path": "/", "keep_free_space": "0" - } + {"name": "local", "path": "/", "keep_free_space": "0"}, ] click_disk_data = json.loads( From 2b3e1920ebfe32e99c2833acce357076a7480e40 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 31 May 2024 15:22:09 +0200 Subject: [PATCH 099/439] break tests to meet the timeout --- ...uplication_insert_several_blocks.reference | 1922 ----------------- ...008_deduplication_insert_several_blocks.sh | 97 - ...ert_several_blocks_nonreplicated.reference | 962 +++++++++ ...ion_insert_several_blocks_nonreplicated.sh | 58 + ...insert_several_blocks_replicated.reference | 962 +++++++++ ...cation_insert_several_blocks_replicated.sh | 58 + ...tion_mv_generates_several_blocks.reference | 1922 ----------------- ...duplication_mv_generates_several_blocks.sh | 103 - ...tes_several_blocks_nonreplicated.reference | 962 +++++++++ ..._generates_several_blocks_nonreplicated.sh | 58 + ...erates_several_blocks_replicated.reference | 962 +++++++++ ..._mv_generates_several_blocks_replicated.sh | 58 + ...cation_several_mv_into_one_table.reference | 1410 ------------ ...deduplication_several_mv_into_one_table.sh | 111 - ..._mv_into_one_table_nonreplicated.reference | 706 ++++++ ...several_mv_into_one_table_nonreplicated.sh | 58 + ...ral_mv_into_one_table_replicated.reference | 706 ++++++ ...on_several_mv_into_one_table_replicated.sh | 58 + 18 files changed, 5608 insertions(+), 5565 deletions(-) delete mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh delete mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh delete mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference deleted file mode 100644 index 641735d1bb6..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference +++ /dev/null @@ -1,1922 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -FIXED - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -FIXED - -Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -FIXED - -Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -FIXED - -Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh deleted file mode 100755 index ed50110b7eb..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -# fails, it is a error. Several blocks in scr table with the same user token are processed in parallel and deduplicated - -# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" -# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False" -# fails, it is a error. The same situation as first one, but on dst table. - -RUN_ONLY="" -#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -KNOWN_ERRORS=(8 9 10 11 12 13) - -function is_known_error() -{ - n=$1 - for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then - return 0 - fi - done - return 1 -} - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi - done - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference new file mode 100644 index 00000000000..bf900aa84d2 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh new file mode 100755 index 00000000000..c758e2fb3de --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="MergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference new file mode 100644 index 00000000000..c815324b455 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh new file mode 100755 index 00000000000..45b222b1fc4 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="ReplicatedMergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference deleted file mode 100644 index 06f30793670..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference +++ /dev/null @@ -1,1922 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -FIXED - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -FIXED - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -FIXED - -Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -FIXED - -Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -FIXED - -Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -FIXED - -Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -FIXED - -Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -FIXED - -Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh deleted file mode 100755 index 61996905135..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -# failed due to race in multi thread insertion, blocks are deduplicated in different threads - -# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -# the same as first but for dst table - -# Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# dst table deduplicates all incoming blocks from one insert because not uniq hash - -RUN_ONLY="" -#RUN_ONLY="Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -KNOWN_ERRORS=(8 9 10 11 12 13 16 20 24 28) - -function is_known_error() -{ - n=$1 - for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then - return 0 - fi - done - return 1 -} - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi - done - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference new file mode 100644 index 00000000000..76ef4cf6b2c --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh new file mode 100755 index 00000000000..50cf2a3bb75 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="MergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference new file mode 100644 index 00000000000..a84539df16b --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh new file mode 100755 index 00000000000..2b094e0309e --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="ReplicatedMergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 20: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference deleted file mode 100644 index 4d517948a25..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference +++ /dev/null @@ -1,1410 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -FIXED - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -FIXED - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -FIXED - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -FIXED - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -FIXED - -Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -FIXED - -Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -FIXED - -Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -FIXED - -Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -FIXED - -Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -FIXED - -Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -FIXED - -Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -FIXED - -Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -FIXED - -Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -FIXED - -Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -FIXED - -Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -FIXED - -Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh deleted file mode 100755 index 3d2814ed77d..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -# race condition on insert into src table - -# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -# race condition on insert into dst table - -# Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# dst deduplicates blocks from one inserts from different materialized view - -# Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -# Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -# Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -# Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -# dst deduplicates blocks from different inserts by hash - -KNOWN_ERRORS=(8 9 10 11 12 13 16 20 24 28 17 21 25 29) - -function is_known_error() -{ - n=$1 - for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then - return 0 - fi - done - return 1 -} - -RUN_ONLY="" -#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi - done - done - done - done - done - done -done - -echo -echo "All cases executed" - - diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference new file mode 100644 index 00000000000..b6a3e0175a7 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference @@ -0,0 +1,706 @@ + +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh new file mode 100755 index 00000000000..33da54b90f1 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="MergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference new file mode 100644 index 00000000000..1921103f49e --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference @@ -0,0 +1,706 @@ + +Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh new file mode 100755 index 00000000000..290d1f794b2 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="ReplicatedMergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" From ddde0f5fed1a8d3f57e743f54b2d14dcdaf98908 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 31 May 2024 16:25:03 +0200 Subject: [PATCH 100/439] fix headers --- src/Common/CollectionOfDerived.h | 4 +- src/Interpreters/InterpreterInsertQuery.cpp | 50 +++++++++---------- src/Interpreters/SquashingTransform.cpp | 18 +++---- src/Processors/Chunk.h | 6 --- src/Processors/ISimpleTransform.h | 2 - .../Algorithms/ReplacingSortedAlgorithm.h | 3 +- src/Processors/Sinks/SinkToStorage.h | 3 -- .../Transforms/AggregatingInOrderTransform.h | 2 +- .../Transforms/AggregatingTransform.h | 3 +- .../Transforms/CountingTransform.cpp | 6 +-- src/Processors/Transforms/CountingTransform.h | 2 - ...m.cpp => DeduplicationTokenTransforms.cpp} | 12 ++++- ...sform.h => DeduplicationTokenTransforms.h} | 15 +----- src/Processors/Transforms/JoiningTransform.h | 5 +- .../Transforms/MaterializingTransform.cpp | 2 - ...ergingAggregatedMemoryEfficientTransform.h | 2 +- .../Transforms/SquashingChunksTransform.h | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 7 ++- src/Storages/LiveView/StorageLiveView.cpp | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 15 +++--- .../MergeTree/ReplicatedMergeTreeSink.cpp | 26 +++++----- src/Storages/StorageDistributed.cpp | 2 - src/Storages/StorageLog.cpp | 3 +- src/Storages/WindowView/StorageWindowView.cpp | 2 +- 24 files changed, 88 insertions(+), 106 deletions(-) rename src/Processors/Transforms/{NumberBlocksTransform.cpp => DeduplicationTokenTransforms.cpp} (91%) rename src/Processors/Transforms/{NumberBlocksTransform.h => DeduplicationTokenTransforms.h} (89%) diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h index c98e375b4b1..60a91e593f9 100644 --- a/src/Common/CollectionOfDerived.h +++ b/src/Common/CollectionOfDerived.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include #include @@ -41,7 +43,7 @@ private: using Records = std::vector; public: - void swap(Self & other) + void swap(Self & other) noexcept { records.swap(other.records); } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 249c69b51b9..758ac4ab954 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include @@ -309,8 +309,8 @@ Chain InterpreterInsertQuery::buildSink( ThreadGroupPtr running_group, std::atomic_uint64_t * elapsed_counter_ms) { - LOG_DEBUG(getLogger("InsertQuery"), - "called InterpreterInsertQuery::buildSink() engine {} table name {}.{}", table->getName(), table->getStorageID().database_name, table->getStorageID().table_name); + // LOG_DEBUG(getLogger("InsertQuery"), + // "called InterpreterInsertQuery::buildSink() engine {} table name {}.{}", table->getName(), table->getStorageID().database_name, table->getStorageID().table_name); ThreadStatus * thread_status = current_thread; @@ -413,9 +413,9 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < sink_streams; ++i) { - LOG_DEBUG(getLogger("InsertQuery"), - "call buildSink sink_streams table name {}.{}, stream {}/{}", - table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams); + // LOG_DEBUG(getLogger("InsertQuery"), + // "call buildSink sink_streams table name {}.{}, stream {}/{}", + // table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams); auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, running_group, /* elapsed_counter_ms= */ nullptr); @@ -425,9 +425,9 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < presink_streams; ++i) { - LOG_DEBUG(getLogger("InsertQuery"), - "call buildSink presink_streams table name {}.{}, stream {}/{}", - table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); + // LOG_DEBUG(getLogger("InsertQuery"), + // "call buildSink presink_streams table name {}.{}, stream {}/{}", + // table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); presink_chains.emplace_back(std::move(out)); @@ -462,8 +462,8 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & ContextPtr select_context = getContext(); - LOG_DEBUG(getLogger("InsertQuery"), - "execute() is_trivial_insert_select {} prefersLargeBlocks={} max_insert_threads {}", is_trivial_insert_select, table->prefersLargeBlocks(), settings.max_insert_threads); + // LOG_DEBUG(getLogger("InsertQuery"), + // "execute() is_trivial_insert_select {} prefersLargeBlocks={} max_insert_threads {}", is_trivial_insert_select, table->prefersLargeBlocks(), settings.max_insert_threads); if (is_trivial_insert_select) { @@ -511,9 +511,9 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & pipeline.dropTotalsAndExtremes(); - LOG_DEBUG(getLogger("InsertQuery"), - "adding transforms, pipline size {}, threads {}, max_insert_threads {}", - pipeline.getNumStreams(), pipeline.getNumThreads(), settings.max_insert_threads); + // LOG_DEBUG(getLogger("InsertQuery"), + // "adding transforms, pipline size {}, threads {}, max_insert_threads {}", + // pipeline.getNumStreams(), pipeline.getNumThreads(), settings.max_insert_threads); /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. @@ -743,13 +743,13 @@ BlockIO InterpreterInsertQuery::execute() StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); - bool is_table_dist = false; - if (auto * dist_storage = dynamic_cast(table.get())) - { - is_table_dist = true; - LOG_DEBUG(getLogger("InsertQuery"), - "dist_storage engine {} table name {}.{}", dist_storage->getName(), dist_storage->getStorageID().database_name, dist_storage->getStorageID().table_name); - } + // bool is_table_dist = false; + // if (auto * dist_storage = dynamic_cast(table.get())) + // { + // is_table_dist = true; + // // LOG_DEBUG(getLogger("InsertQuery"), + // // "dist_storage engine {} table name {}.{}", dist_storage->getName(), dist_storage->getStorageID().database_name, dist_storage->getStorageID().table_name); + // } if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); @@ -780,24 +780,24 @@ BlockIO InterpreterInsertQuery::execute() auto distributed = table->distributedWrite(query, getContext()); if (distributed) { - LOG_DEBUG(getLogger("InsertQuery"),"as dist pipeline, is_table_dist {}", is_table_dist); + // LOG_DEBUG(getLogger("InsertQuery"),"as dist pipeline, is_table_dist {}", is_table_dist); res.pipeline = std::move(*distributed); } else { - LOG_DEBUG(getLogger("InsertQuery"),"as insert select after dist, is_table_dist {}", is_table_dist); + // LOG_DEBUG(getLogger("InsertQuery"),"as insert select after dist, is_table_dist {}", is_table_dist); res.pipeline = buildInsertSelectPipeline(query, table); } } else { - LOG_DEBUG(getLogger("InsertQuery"),"as insert select, is_table_dist {}", is_table_dist); + // LOG_DEBUG(getLogger("InsertQuery"),"as insert select, is_table_dist {}", is_table_dist); res.pipeline = buildInsertSelectPipeline(query, table); } } else { - LOG_DEBUG(getLogger("InsertQuery"),"as just insert, is_table_dist {}", is_table_dist); + // LOG_DEBUG(getLogger("InsertQuery"),"as just insert, is_table_dist {}", is_table_dist); res.pipeline = buildInsertPipeline(query, table); } diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index 30c801aaaff..a539870d50c 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -72,10 +72,10 @@ void SquashingTransform::append(Block && input_block) return; } - LOG_DEBUG(getLogger("SquashingTransform"), - "input_block rows {}, size {}, columns {}, accumulated_block rows {}, size {}, columns {}, ", - input_block.rows(), input_block.bytes(), input_block.columns(), - accumulated_block.rows(), accumulated_block.bytes(), accumulated_block.columns()); + // LOG_DEBUG(getLogger("SquashingTransform"), + // "input_block rows {}, size {}, columns {}, accumulated_block rows {}, size {}, columns {}, ", + // input_block.rows(), input_block.bytes(), input_block.columns(), + // accumulated_block.rows(), accumulated_block.bytes(), accumulated_block.columns()); assert(blocksHaveEqualStructure(input_block, accumulated_block)); @@ -86,11 +86,11 @@ void SquashingTransform::append(Block && input_block) const auto source_column = std::move(input_block.getByPosition(i).column); auto acc_column = std::move(accumulated_block.getByPosition(i).column); - LOG_DEBUG(getLogger("SquashingTransform"), - "column {} {}, acc rows {}, size {}, allocated {}, input rows {} size {} allocated {}", - i, source_column->getName(), - acc_column->size(), acc_column->byteSize(), acc_column->allocatedBytes(), - source_column->size(), source_column->byteSize(), source_column->allocatedBytes()); + // LOG_DEBUG(getLogger("SquashingTransform"), + // "column {} {}, acc rows {}, size {}, allocated {}, input rows {} size {} allocated {}", + // i, source_column->getName(), + // acc_column->size(), acc_column->byteSize(), acc_column->allocatedBytes(), + // source_column->size(), source_column->byteSize(), source_column->allocatedBytes()); auto mutable_column = IColumn::mutate(std::move(acc_column)); diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index b4345d18a08..1348966c0d3 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -1,15 +1,9 @@ #pragma once -#include "base/defines.h" - #include #include -#include -#include #include -#include -#include namespace DB { diff --git a/src/Processors/ISimpleTransform.h b/src/Processors/ISimpleTransform.h index a47e0e49121..629529cdffa 100644 --- a/src/Processors/ISimpleTransform.h +++ b/src/Processors/ISimpleTransform.h @@ -2,8 +2,6 @@ #include -#include - namespace DB { diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index f36e07b8a96..2f23f2a5c4d 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -1,10 +1,9 @@ #pragma once -#include #include #include #include #include -#include "Processors/Chunk.h" +#include namespace Poco { diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index c350b9f79b0..c728fa87b1e 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -1,9 +1,6 @@ #pragma once -#include #include -#include #include -#include namespace DB { diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 6433f862dfd..41a0d7fc7f1 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -5,7 +5,7 @@ #include #include #include -#include "Processors/Chunk.h" +#include namespace DB { diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 430a9a6e50a..95983c39d1e 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -1,15 +1,14 @@ #pragma once -#include #include #include #include +#include #include #include #include #include #include #include -#include "Processors/Chunk.h" namespace CurrentMetrics { diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index d39c6575292..2c6b3bd8638 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -1,9 +1,9 @@ - -#include #include + +#include +#include #include #include -#include "IO/Progress.h" namespace ProfileEvents diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h index 4efcf147ac7..05d8e2aeac8 100644 --- a/src/Processors/Transforms/CountingTransform.h +++ b/src/Processors/Transforms/CountingTransform.h @@ -4,8 +4,6 @@ #include #include -#include - namespace DB { diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp similarity index 91% rename from src/Processors/Transforms/NumberBlocksTransform.cpp rename to src/Processors/Transforms/DeduplicationTokenTransforms.cpp index d51fe67c868..ea4537bb5ad 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -1,4 +1,4 @@ -#include +#include #include @@ -18,6 +18,16 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +void RestoreChunkInfosTransform::transform(Chunk & chunk) +{ + LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "chunk infos before: {}:{}, append: {}:{}, chunk has rows {}", + chunk.getChunkInfos().size(), chunk.getChunkInfos().debug(), + chunk_infos.size(), chunk_infos.debug(), + chunk.getNumRows()); + + chunk.getChunkInfos().append(chunk_infos.clone()); +} + namespace DeduplicationToken { diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h similarity index 89% rename from src/Processors/Transforms/NumberBlocksTransform.h rename to src/Processors/Transforms/DeduplicationTokenTransforms.h index a2e48d9b548..f0bcc3052f7 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -14,22 +14,11 @@ namespace DB RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_) : ISimpleTransform(header_, header_, true) , chunk_infos(std::move(chunk_infos_)) - { - LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "create RestoreChunkInfosTransform to append {}:{}", - chunk_infos.size(), chunk_infos.debug()); - } + {} String getName() const override { return "RestoreChunkInfosTransform"; } - void transform(Chunk & chunk) override - { - LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "chunk infos before: {}:{}, append: {}:{}, chunk has rows {}", - chunk.getChunkInfos().size(), chunk.getChunkInfos().debug(), - chunk_infos.size(), chunk_infos.debug(), - chunk.getNumRows()); - - chunk.getChunkInfos().append(chunk_infos.clone()); - } + void transform(Chunk & chunk) override; private: Chunk::ChunkInfoCollection chunk_infos; diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index 5fdea2524e2..5f6d9d6fff2 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -1,8 +1,7 @@ #pragma once -#include #include -#include "Processors/Chunk.h" - +#include +#include namespace DB { diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index 4a7f5187c75..9ae80e21a68 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -1,8 +1,6 @@ #include #include -#include - namespace DB { diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h index 958b43b11ed..3a3c1bd9c1e 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h @@ -2,8 +2,8 @@ #include #include -#include "Processors/Chunk.h" #include +#include #include #include #include diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index f0334549d4c..860e84f2cd3 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -4,7 +4,7 @@ #include #include #include -#include "Processors/Chunk.h" +#include namespace DB { diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 7a32b6ff038..bef00fa3f1d 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -5,7 +5,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -15,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -24,9 +25,7 @@ #include #include #include -#include "Core/Field.h" -#include -#include +#include #include #include diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index b9d29a90f56..dd20bea4dd6 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -22,12 +22,12 @@ limitations under the License. */ #include #include #include +#include #include #include #include #include #include -#include "Processors/Transforms/NumberBlocksTransform.h" #include #include diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 0953cdc5d72..ba81bb7a56d 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,12 +1,13 @@ -#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include "Common/Exception.h" -#include -#include "Interpreters/StorageID.h" + +#include namespace ProfileEvents { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 62d30764ca8..16bb9827c6e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -1,23 +1,25 @@ -#include -#include -#include -#include -#include -#include #include "Common/Exception.h" #include #include #include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include +#include +#include +#include + #include +#include + namespace ProfileEvents { diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 257c8c312e5..5e03840fa36 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -23,7 +23,6 @@ #include -#include "Common/logger_useful.h" #include #include #include @@ -107,7 +106,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 8b1bf4637b4..1a84f578cf8 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -1,7 +1,7 @@ #include #include +#include -#include "Common/logger_useful.h" #include #include #include @@ -22,7 +22,6 @@ #include #include -#include "StorageLogSettings.h" #include #include #include diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 4ae91d64023..17ecba2b4a5 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -32,12 +32,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include #include From 6dfd226daa8421055f3a1103fa72323c68c71959 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 4 Jun 2024 17:27:13 +0200 Subject: [PATCH 101/439] fix populate --- .../DeduplicationTokenTransforms.cpp | 29 ++++++++++--------- .../Transforms/DeduplicationTokenTransforms.h | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 15 ++++++++-- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index ea4537bb5ad..4f822e4aebb 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -67,6 +67,9 @@ void TokenInfo::setSourceBlockNumber(size_t sbn) void TokenInfo::setViewID(const String & id) { + LOG_DEBUG(getLogger("TokenInfo"), + "token: {}, stage: {}, view id: {}", + getToken(false), stage, id); chassert(stage == VIEW_ID); addTokenPart(fmt::format(":view-id-{}", id)); stage = VIEW_BLOCK_NUMBER; @@ -115,7 +118,18 @@ void CheckTokenTransform::transform(Chunk & chunk) LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, token: {}", debug, token_info->getToken(false)); } -void SetInitialTokenTransform::setInitialToken(Chunk & chunk) +String SetInitialTokenTransform::getInitialToken(const Chunk & chunk) +{ + SipHash hash; + for (const auto & colunm : chunk.getColumns()) + colunm->updateHashFast(hash); + + const auto hash_value = hash.get128(); + return toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); +} + + +void SetInitialTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); @@ -129,18 +143,7 @@ void SetInitialTokenTransform::setInitialToken(Chunk & chunk) if (token_info->tokenInitialized()) return; - SipHash hash; - for (const auto & colunm : chunk.getColumns()) - colunm->updateHashFast(hash); - - const auto hash_value = hash.get128(); - token_info->setInitialToken(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); -} - - -void SetInitialTokenTransform::transform(Chunk & chunk) -{ - setInitialToken(chunk); + token_info->setInitialToken(getInitialToken(chunk)); } void SetUserTokenTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index f0bcc3052f7..46d355eb487 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -111,7 +111,7 @@ namespace DeduplicationToken void transform(Chunk & chunk) override; - static void setInitialToken(Chunk & chunk); + static String getInitialToken(const Chunk & chunk); }; class ResetTokenTransform : public ISimpleTransform diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index bef00fa3f1d..b259e803f80 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -529,8 +529,6 @@ Chain buildPushingToViewsChain( result_chain = Chain(std::move(processors)); result_chain.setNumThreads(std::min(views_data->max_threads, max_parallel_streams)); result_chain.setConcurrencyControl(settings.use_concurrency_control); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } if (auto * live_view = dynamic_cast(storage.get())) @@ -538,12 +536,25 @@ Chain buildPushingToViewsChain( auto sink = std::make_shared(live_view_header, *live_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (auto * window_view = dynamic_cast(storage.get())) { auto sink = std::make_shared(window_view->getInputHeader(), *window_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + } + else if (dynamic_cast(storage.get())) + { + auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); + metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); + sink->setRuntimeData(thread_status, elapsed_counter_ms); + result_chain.addSource(std::move(sink)); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } /// Do not push to destination table if the flag is set else if (!no_destination) From d72fac13ec7d02d35e49be8f799c82c4762b242b Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 4 Jun 2024 18:58:05 +0200 Subject: [PATCH 102/439] mark insert block with a set of block ids from all partitions --- .../DeduplicationTokenTransforms.cpp | 26 ++++++-- .../Transforms/DeduplicationTokenTransforms.h | 3 +- src/Storages/MergeTree/MergeTreeSink.cpp | 7 ++- .../MergeTree/ReplicatedMergeTreeSink.cpp | 7 ++- ...on_insert_into_partitioned_table.reference | 35 +++++++++++ ...lication_insert_into_partitioned_table.sql | 63 +++++++++++++++++++ 6 files changed, 132 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index 4f822e4aebb..dba6fc40b11 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -39,15 +39,24 @@ String DB::DeduplicationToken::TokenInfo::getToken(bool enable_assert) const result.reserve(getTotalSize()); for (const auto & part : parts) + { + if (!result.empty()) + result.append(":"); result.append(part); + } return result; } -void DB::DeduplicationToken::TokenInfo::setInitialToken(String part) +void DB::DeduplicationToken::TokenInfo::addPieceToInitialToken(String part) { chassert(stage == INITIAL); addTokenPart(std::move(part)); +} + +void DB::DeduplicationToken::TokenInfo::closeInitialToken() +{ + chassert(stage == INITIAL); stage = VIEW_ID; } @@ -61,7 +70,7 @@ void TokenInfo::setUserToken(const String & token) void TokenInfo::setSourceBlockNumber(size_t sbn) { chassert(stage == SOURCE_BLOCK_NUMBER); - addTokenPart(fmt::format(":source-number-{}", sbn)); + addTokenPart(fmt::format("source-number-{}", sbn)); stage = VIEW_ID; } @@ -71,14 +80,14 @@ void TokenInfo::setViewID(const String & id) "token: {}, stage: {}, view id: {}", getToken(false), stage, id); chassert(stage == VIEW_ID); - addTokenPart(fmt::format(":view-id-{}", id)); + addTokenPart(fmt::format("view-id-{}", id)); stage = VIEW_BLOCK_NUMBER; } void TokenInfo::setViewBlockNumber(size_t mvbn) { chassert(stage == VIEW_BLOCK_NUMBER); - addTokenPart(fmt::format(":view-block-{}", mvbn)); + addTokenPart(fmt::format("view-block-{}", mvbn)); stage = VIEW_ID; } @@ -96,10 +105,14 @@ void TokenInfo::addTokenPart(String part) size_t TokenInfo::getTotalSize() const { + if (parts.empty()) + return 0; + size_t size = 0; for (const auto & part : parts) size += part.size(); - return size; + + return size + parts.size() - 1; } void CheckTokenTransform::transform(Chunk & chunk) @@ -143,7 +156,8 @@ void SetInitialTokenTransform::transform(Chunk & chunk) if (token_info->tokenInitialized()) return; - token_info->setInitialToken(getInitialToken(chunk)); + token_info->addPieceToInitialToken(getInitialToken(chunk)); + token_info->closeInitialToken(); } void SetUserTokenTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 46d355eb487..27bb21dfad1 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -38,7 +38,8 @@ namespace DeduplicationToken bool empty() const { return parts.empty(); } bool tokenInitialized() const { return stage != INITIAL && stage != SOURCE_BLOCK_NUMBER; } - void setInitialToken(String part); + void addPieceToInitialToken(String part); + void closeInitialToken(); void setUserToken(const String & token); void setSourceBlockNumber(size_t sbn); void setViewID(const String & id); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index ba81bb7a56d..b31e7e6a562 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -147,7 +147,7 @@ void MergeTreeSink::consume(Chunk & chunk) if (!token_info->tokenInitialized()) { chassert(temp_part.part); - token_info->setInitialToken(temp_part.part->getPartBlockIDHash()); + token_info->addPieceToInitialToken(temp_part.part->getPartBlockIDHash()); } if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) @@ -194,6 +194,11 @@ void MergeTreeSink::consume(Chunk & chunk) }); } + if (!token_info->tokenInitialized()) + { + token_info->closeInitialToken(); + } + finishDelayedChunk(); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 16bb9827c6e..8cb4095f1e6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -393,7 +393,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if (!token_info->tokenInitialized()) { chassert(temp_part.part); - token_info->setInitialToken(temp_part.part->getPartBlockIDHash()); + token_info->addPieceToInitialToken(temp_part.part->getPartBlockIDHash()); } } @@ -440,6 +440,11 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) )); } + if (!token_info->tokenInitialized()) + { + token_info->closeInitialToken(); + } + finishDelayedChunk(zookeeper); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference new file mode 100644 index 00000000000..e69cf2be182 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference @@ -0,0 +1,35 @@ +no user deduplication token +partitioned_table: +1 A +1 D +2 B +2 C +mv_table: +1 A +1 A +1 D +2 B +2 B +2 C +with user deduplication token +partitioned_table: +1 A +1 A +1 D +2 B +2 B +2 C +mv_table: +1 A +1 A +1 D +2 B +2 B +2 C +with incorrect ussage of user deduplication token +partitioned_table: +1 A +2 B +mv_table: +1 A +2 B diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql new file mode 100644 index 00000000000..918b7f2553d --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql @@ -0,0 +1,63 @@ +DROP TABLE IF EXISTS partitioned_table; +DROP TABLE IF EXISTS mv_table; + +CREATE TABLE partitioned_table + (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') + partition by key % 10 + order by tuple(); + +CREATE MATERIALIZED VIEW mv_table (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') + ORDER BY tuple() + AS SELECT key, value FROM partitioned_table; + +SET deduplicate_blocks_in_dependent_materialized_views = 1; + + +SELECT 'no user deduplication token'; + +INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'B'); +INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'C'); +INSERT INTO partitioned_table VALUES (1, 'D'), (2, 'B'); + +SELECT 'partitioned_table is deduplicated bacause deduplication works in scope of one partiotion:'; +SELECT * FROM partitioned_table ORDER BY ALL; +SELECT 'mv_table is not deduplicated because the inserted blocks was different:'; +SELECT * FROM mv_table ORDER BY ALL; + +TRUNCATE TABLE partitioned_table; +TRUNCATE TABLE mv_table; + + +SELECT 'with user deduplication token'; + +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_1' VALUES (1, 'A'), (2, 'B'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_2' VALUES (1, 'A'), (2, 'C'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_3' VALUES (1, 'D'), (2, 'B'); + +SELECT 'partitioned_table is not deduplicated because different tokens:'; +SELECT * FROM partitioned_table ORDER BY ALL; +SELECT 'mv_table is not deduplicated because different tokens:'; +SELECT * FROM mv_table ORDER BY ALL; + +TRUNCATE TABLE partitioned_table; +TRUNCATE TABLE mv_table; + + +SELECT 'with incorrect ussage of user deduplication token'; + +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'B'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'C'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'D'), (2, 'B'); + +SELECT 'partitioned_table is deduplicated because equal tokens:'; +SELECT * FROM partitioned_table ORDER BY ALL; +SELECT 'mv_table is deduplicated because equal tokens:'; +SELECT * FROM mv_table ORDER BY ALL; + +TRUNCATE TABLE partitioned_table; +TRUNCATE TABLE mv_table; + +DROP TABLE partitioned_table; +DROP TABLE mv_table; From dbc07ec573d3310b4f5019b8887fd34288bf23cd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 4 Jun 2024 20:28:38 +0200 Subject: [PATCH 103/439] adjust tests --- ...02912_ingestion_mv_deduplication.reference | 2 +- .../02912_ingestion_mv_deduplication.sql | 2 +- ...on_insert_into_partitioned_table.reference | 12 ++--- ...lication_insert_into_partitioned_table.sql | 44 ++++++++++++++----- 4 files changed, 40 insertions(+), 20 deletions(-) diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference index ae82b9c0463..07deb7c2565 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference @@ -17,7 +17,7 @@ 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 42 +2022-09-01 12:00:00 84 2023-09-01 12:00:00 42 -- Regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184 -- Landing (Agg/Replacing)MergeTree diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql index 450d92476a9..a2378fd8f67 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql @@ -98,7 +98,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view This is what happens now: - 1st insert works for landing and mv tables - - 2nd insert gets first block 20220901 deduplicated and second one inserted for landing and mv tables + - 2nd insert gets first block 20220901 deduplicated for landing and both rows are inserted for mv tables */ SET deduplicate_blocks_in_dependent_materialized_views = 1, max_insert_delayed_streams_for_parallel_write = 1000; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference index e69cf2be182..c82a6eaa213 100644 --- a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference +++ b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference @@ -1,10 +1,10 @@ no user deduplication token -partitioned_table: +partitioned_table is deduplicated bacause deduplication works in scope of one partiotion: 1 A 1 D 2 B 2 C -mv_table: +mv_table is not deduplicated because the inserted blocks was different: 1 A 1 A 1 D @@ -12,14 +12,14 @@ mv_table: 2 B 2 C with user deduplication token -partitioned_table: +partitioned_table is not deduplicated because different tokens: 1 A 1 A 1 D 2 B 2 B 2 C -mv_table: +mv_table is not deduplicated because different tokens: 1 A 1 A 1 D @@ -27,9 +27,9 @@ mv_table: 2 B 2 C with incorrect ussage of user deduplication token -partitioned_table: +partitioned_table is deduplicated because equal tokens: 1 A 2 B -mv_table: +mv_table is deduplicated because equal tokens: 1 A 2 B diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql index 918b7f2553d..2eb931f7f73 100644 --- a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql +++ b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql @@ -1,6 +1,12 @@ DROP TABLE IF EXISTS partitioned_table; DROP TABLE IF EXISTS mv_table; + +SET deduplicate_blocks_in_dependent_materialized_views = 1; + + +SELECT 'no user deduplication token'; + CREATE TABLE partitioned_table (key Int64, value String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') @@ -12,11 +18,6 @@ CREATE MATERIALIZED VIEW mv_table (key Int64, value String) ORDER BY tuple() AS SELECT key, value FROM partitioned_table; -SET deduplicate_blocks_in_dependent_materialized_views = 1; - - -SELECT 'no user deduplication token'; - INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'B'); INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'C'); INSERT INTO partitioned_table VALUES (1, 'D'), (2, 'B'); @@ -26,12 +27,23 @@ SELECT * FROM partitioned_table ORDER BY ALL; SELECT 'mv_table is not deduplicated because the inserted blocks was different:'; SELECT * FROM mv_table ORDER BY ALL; -TRUNCATE TABLE partitioned_table; -TRUNCATE TABLE mv_table; +DROP TABLE partitioned_table; +DROP TABLE mv_table; SELECT 'with user deduplication token'; +CREATE TABLE partitioned_table + (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') + partition by key % 10 + order by tuple(); + +CREATE MATERIALIZED VIEW mv_table (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') + ORDER BY tuple() + AS SELECT key, value FROM partitioned_table; + INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_1' VALUES (1, 'A'), (2, 'B'); INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_2' VALUES (1, 'A'), (2, 'C'); INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_3' VALUES (1, 'D'), (2, 'B'); @@ -41,12 +53,23 @@ SELECT * FROM partitioned_table ORDER BY ALL; SELECT 'mv_table is not deduplicated because different tokens:'; SELECT * FROM mv_table ORDER BY ALL; -TRUNCATE TABLE partitioned_table; -TRUNCATE TABLE mv_table; +DROP TABLE partitioned_table; +DROP TABLE mv_table; SELECT 'with incorrect ussage of user deduplication token'; +CREATE TABLE partitioned_table + (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') + partition by key % 10 + order by tuple(); + +CREATE MATERIALIZED VIEW mv_table (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') + ORDER BY tuple() + AS SELECT key, value FROM partitioned_table; + INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'B'); INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'C'); INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'D'), (2, 'B'); @@ -56,8 +79,5 @@ SELECT * FROM partitioned_table ORDER BY ALL; SELECT 'mv_table is deduplicated because equal tokens:'; SELECT * FROM mv_table ORDER BY ALL; -TRUNCATE TABLE partitioned_table; -TRUNCATE TABLE mv_table; - DROP TABLE partitioned_table; DROP TABLE mv_table; From 86bbcb0037302ac699230a4d180b7eb98e1b4ee6 Mon Sep 17 00:00:00 2001 From: pufit Date: Thu, 6 Jun 2024 01:42:07 -0400 Subject: [PATCH 104/439] Startup scripts --- programs/server/Server.cpp | 46 +++++++++++++++++++ src/Core/ServerSettings.h | 1 + src/Interpreters/SystemLog.cpp | 6 +++ src/Interpreters/SystemLog.h | 12 ++--- .../test_startup_scripts/__init__.py | 0 .../test_startup_scripts/configs/config.xml | 14 ++++++ .../test_startup_scripts/configs/users.xml | 41 +++++++++++++++++ .../integration/test_startup_scripts/test.py | 18 ++++++++ 8 files changed, 132 insertions(+), 6 deletions(-) create mode 100644 tests/integration/test_startup_scripts/__init__.py create mode 100644 tests/integration/test_startup_scripts/configs/config.xml create mode 100644 tests/integration/test_startup_scripts/configs/users.xml create mode 100644 tests/integration/test_startup_scripts/test.py diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8fcb9d87a93..9654f90a928 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -613,6 +613,49 @@ static void sanityChecks(Server & server) } } +void loadStartupScripts(const Poco::Util::AbstractConfiguration & config, ContextMutablePtr context, Poco::Logger * log) +{ + try + { + Poco::Util::AbstractConfiguration::Keys keys; + config.keys("startup_scripts", keys); + + SetResultDetailsFunc callback; + for (const auto & key : keys) + { + std::string full_prefix = "startup_scripts." + key; + + if (config.has(full_prefix + ".condition")) + { + auto condition = config.getString(full_prefix + ".condition"); + auto condition_read_buffer = ReadBufferFromString(condition); + auto condition_write_buffer = WriteBufferFromOwnString(); + + LOG_DEBUG(log, "Checking startup query condition `{}`", condition); + executeQuery(condition_read_buffer, condition_write_buffer, true, context, callback, QueryFlags{ .internal = true }, std::nullopt, {}); + + auto result = condition_write_buffer.str(); + + if (result != "1\n" && result != "true\n") + continue; + + LOG_DEBUG(log, "Condition is true, will execute the query next"); + } + + auto query = config.getString(full_prefix + ".query"); + auto read_buffer = ReadBufferFromString(query); + auto write_buffer = WriteBufferFromOwnString(); + + LOG_DEBUG(log, "Executing query `{}`", query); + executeQuery(read_buffer, write_buffer, true, context, callback, QueryFlags{ .internal = true }, std::nullopt, {}); + } + } + catch (const std::exception & e) + { + LOG_ERROR(log, "Failed to parse startup scripts file {}", e.what()); + } +} + static void initializeAzureSDKLogger( [[ maybe_unused ]] const ServerSettings & server_settings, [[ maybe_unused ]] int server_logs_level) @@ -2107,6 +2150,9 @@ try load_metadata_tasks.clear(); load_metadata_tasks.shrink_to_fit(); + if (config().has("startup_scripts")) + loadStartupScripts(config(), global_context, log); + { std::lock_guard lock(servers_lock); for (auto & server : servers) diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 45f235116ab..74bd6cdf0e5 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -146,6 +146,7 @@ namespace DB M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ + M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `_log` tables before the startup. It can be helpful if some startup scripts depend on `_log` tables.", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 5e0ce2cb0de..952abd309ce 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -350,10 +351,15 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf if (blob_storage_log) logs.emplace_back(blob_storage_log.get()); + bool should_prepare = global_context->getServerSettings().prepare_system_log_tables_on_startup; try { for (auto & log : logs) + { log->startup(); + if (should_prepare) + log->prepareTable(); + } } catch (...) { diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index af635ca1bdb..b7becf5030c 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -132,6 +132,12 @@ public: void stopFlushThread() override; + /** Creates new table if it does not exist. + * Renames old table if its structure is not suitable. + * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created. + */ + void prepareTable() override; + protected: LoggerPtr log; @@ -142,12 +148,6 @@ protected: StoragePtr getStorage() const; - /** Creates new table if it does not exist. - * Renames old table if its structure is not suitable. - * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created. - */ - void prepareTable() override; - /// Some tables can override settings for internal queries virtual void addSettingsForQuery(ContextMutablePtr & mutable_context, IAST::QueryKind query_kind) const; diff --git a/tests/integration/test_startup_scripts/__init__.py b/tests/integration/test_startup_scripts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_startup_scripts/configs/config.xml b/tests/integration/test_startup_scripts/configs/config.xml new file mode 100644 index 00000000000..42c1965c66d --- /dev/null +++ b/tests/integration/test_startup_scripts/configs/config.xml @@ -0,0 +1,14 @@ + + + + CREATE ROLE OR REPLACE testrole + + + GRANT CREATE USER, ALTER USER, DROP USER, SHOW USERS, SHOW CREATE USER ON *.* TO 'testrole' WITH GRANT OPTION; + + + CREATE TABLE TestTable (id UInt64) ENGINE=TinyLog + SELECT 1; + + + diff --git a/tests/integration/test_startup_scripts/configs/users.xml b/tests/integration/test_startup_scripts/configs/users.xml new file mode 100644 index 00000000000..f9917b034b2 --- /dev/null +++ b/tests/integration/test_startup_scripts/configs/users.xml @@ -0,0 +1,41 @@ + + + + + + + + 1 + + + + + + + + + + ::/0 + + + default + + default + + + + + + + + 3600 + + 0 + 0 + 0 + 0 + 0 + + + + diff --git a/tests/integration/test_startup_scripts/test.py b/tests/integration/test_startup_scripts/test.py new file mode 100644 index 00000000000..ee61994f830 --- /dev/null +++ b/tests/integration/test_startup_scripts/test.py @@ -0,0 +1,18 @@ +from helpers.cluster import ClickHouseCluster + + +def test_startup_scripts(): + cluster = ClickHouseCluster(__file__) + + node = cluster.add_instance( + "node", + main_configs=["configs/config.xml"], + with_zookeeper=False, + ) + + try: + cluster.start() + assert node.query("SHOW TABLES") == "TestTable\n" + + finally: + cluster.shutdown() From d7c70d029e82666f677b3fea5007cb589204b8fe Mon Sep 17 00:00:00 2001 From: divanik Date: Thu, 6 Jun 2024 16:56:30 +0000 Subject: [PATCH 105/439] Ignore caches and encrypted --- programs/disks/DisksApp.cpp | 2 +- programs/disks/DisksClient.cpp | 4 ++-- src/Disks/DiskFactory.cpp | 8 +++++++- src/Disks/DiskFactory.h | 3 ++- src/Disks/DiskSelector.cpp | 8 ++++++-- src/Disks/DiskSelector.h | 4 +++- 6 files changed, 21 insertions(+), 8 deletions(-) diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 4c1d98ec791..8d1c4b24fc1 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -430,7 +430,7 @@ int DisksApp::main(const std::vector & /*args*/) auto validator = [](const Poco::Util::AbstractConfiguration &, const std::string &, const std::string &) { return true; }; constexpr auto config_prefix = "storage_configuration.disks"; - auto disk_selector = std::make_shared(); + auto disk_selector = std::make_shared(std::unordered_set{"cache", "encrypted"}); disk_selector->initialize(config(), config_prefix, global_context, validator); std::vector>> disks_with_path; diff --git a/programs/disks/DisksClient.cpp b/programs/disks/DisksClient.cpp index 1ebfaf40096..40b458fd7b3 100644 --- a/programs/disks/DisksClient.cpp +++ b/programs/disks/DisksClient.cpp @@ -8,8 +8,8 @@ namespace ErrorCodes { -extern const int BAD_ARGUMENTS; -extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; }; namespace DB diff --git a/src/Disks/DiskFactory.cpp b/src/Disks/DiskFactory.cpp index de7ee5a74f4..4aa7f6ff564 100644 --- a/src/Disks/DiskFactory.cpp +++ b/src/Disks/DiskFactory.cpp @@ -27,7 +27,8 @@ DiskPtr DiskFactory::create( ContextPtr context, const DisksMap & map, bool attach, - bool custom_disk) const + bool custom_disk, + const std::unordered_set & skip_types) const { const auto disk_type = config.getString(config_prefix + ".type", "local"); @@ -38,6 +39,11 @@ DiskPtr DiskFactory::create( "DiskFactory: the disk '{}' has unknown disk type: {}", name, disk_type); } + if (skip_types.contains(found->first)) + { + return nullptr; + } + const auto & disk_creator = found->second; return disk_creator(name, config, config_prefix, context, map, attach, custom_disk); } diff --git a/src/Disks/DiskFactory.h b/src/Disks/DiskFactory.h index d03ffa6a40f..044ce81dbae 100644 --- a/src/Disks/DiskFactory.h +++ b/src/Disks/DiskFactory.h @@ -42,7 +42,8 @@ public: ContextPtr context, const DisksMap & map, bool attach = false, - bool custom_disk = false) const; + bool custom_disk = false, + const std::unordered_set & skip_types = {}) const; private: using DiskTypeRegistry = std::unordered_map; diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index 77e2299ed65..b187b491dc0 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -53,8 +53,12 @@ void DiskSelector::initialize( if (disk_validator && !disk_validator(config, disk_config_prefix, disk_name)) continue; - - disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks)); + auto created_disk + = factory.create(disk_name, config, disk_config_prefix, context, disks, /*attach*/ false, /*custom_disk*/ false, skip_types); + if (created_disk.get()) + { + disks.emplace(disk_name, std::move(created_disk)); + } } if (!has_default_disk) { diff --git a/src/Disks/DiskSelector.h b/src/Disks/DiskSelector.h index 6669b428158..fb3cb4a0177 100644 --- a/src/Disks/DiskSelector.h +++ b/src/Disks/DiskSelector.h @@ -20,7 +20,7 @@ class DiskSelector public: static constexpr auto TMP_INTERNAL_DISK_PREFIX = "__tmp_internal_"; - DiskSelector() = default; + explicit DiskSelector(std::unordered_set skip_types_ = {}) : skip_types(skip_types_) { } DiskSelector(const DiskSelector & from) = default; using DiskValidator = std::function; @@ -48,6 +48,8 @@ private: bool is_initialized = false; void assertInitialized() const; + + const std::unordered_set skip_types; }; } From ccb4bd63700267169cbc30da68392ba6d8abb0d9 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 6 Jun 2024 19:35:57 +0200 Subject: [PATCH 106/439] Resolve conflicts: apply implementation to StorageObjectStorage --- .../DataLakes/DeltaLakeMetadata.cpp | 311 ++++++++++++++++-- .../DataLakes/DeltaLakeMetadata.h | 12 +- .../ObjectStorage/DataLakes/HudiMetadata.h | 3 + .../DataLakes/IDataLakeMetadata.h | 3 + .../DataLakes/IStorageDataLake.h | 27 +- .../ObjectStorage/DataLakes/IcebergMetadata.h | 3 + .../ObjectStorage/StorageObjectStorage.cpp | 21 +- .../ObjectStorage/StorageObjectStorage.h | 8 + .../StorageObjectStorageSource.cpp | 31 +- .../StorageObjectStorageSource.h | 5 +- 10 files changed, 384 insertions(+), 40 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 38bf3112ee2..bd3e21f12fd 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -4,19 +4,41 @@ #include #if USE_AWS_S3 && USE_PARQUET -#include + +#include +#include +#include +#include + +#include +#include +#include + #include #include #include -#include -#include -#include -#include -#include -#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include +#include #include -#include +#include +#include +#include + +namespace fs = std::filesystem; namespace DB { @@ -25,6 +47,8 @@ namespace ErrorCodes { extern const int INCORRECT_DATA; extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } struct DeltaLakeMetadata::Impl @@ -74,9 +98,17 @@ struct DeltaLakeMetadata::Impl * An action changes one aspect of the table's state, for example, adding or removing a file. * Note: it is not a valid json, but a list of json's, so we read it in a while cycle. */ - std::set processMetadataFiles() + struct DeltaLakeMetadata + { + NamesAndTypesList schema; + Strings data_files; + DataLakePartitionColumns partition_columns; + }; + DeltaLakeMetadata processMetadataFiles() { std::set result_files; + NamesAndTypesList current_schema; + DataLakePartitionColumns current_partition_columns; const auto checkpoint_version = getCheckpointIfExists(result_files); if (checkpoint_version) @@ -90,7 +122,7 @@ struct DeltaLakeMetadata::Impl if (!object_storage->exists(StoredObject(file_path))) break; - processMetadataFile(file_path, result_files); + processMetadataFile(file_path, current_schema, current_partition_columns, result_files); } LOG_TRACE( @@ -101,10 +133,10 @@ struct DeltaLakeMetadata::Impl { const auto keys = listFiles(*object_storage, *configuration, deltalake_metadata_directory, metadata_file_suffix); for (const String & key : keys) - processMetadataFile(key, result_files); + processMetadataFile(key, current_schema, current_partition_columns, result_files); } - return result_files; + return DeltaLakeMetadata{current_schema, Strings(result_files.begin(), result_files.end()), current_partition_columns}; } /** @@ -136,7 +168,11 @@ struct DeltaLakeMetadata::Impl * \"nullCount\":{\"col-6c990940-59bb-4709-8f2e-17083a82c01a\":0,\"col-763cd7e2-7627-4d8e-9fb7-9e85d0c8845b\":0}}"}} * " */ - void processMetadataFile(const String & key, std::set & result) const + void processMetadataFile( + const String & key, + NamesAndTypesList & file_schema, + DataLakePartitionColumns & file_partition_columns, + std::set & result) { auto read_settings = context->getReadSettings(); auto buf = object_storage->readObject(StoredObject(key), read_settings); @@ -157,20 +193,236 @@ struct DeltaLakeMetadata::Impl if (json_str.empty()) continue; - const JSON json(json_str); - if (json.has("add")) + Poco::JSON::Parser parser; + Poco::Dynamic::Var json = parser.parse(json_str); + Poco::JSON::Object::Ptr object = json.extract(); + + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + object->stringify(oss); + LOG_TEST(log, "Metadata: {}", oss.str()); + + if (object->has("add")) { - const auto path = json["add"]["path"].getString(); - result.insert(std::filesystem::path(configuration->getPath()) / path); + auto add_object = object->get("add").extract(); + auto path = add_object->getValue("path"); + result.insert(fs::path(configuration->getPath()) / path); + + auto filename = fs::path(path).filename().string(); + auto it = file_partition_columns.find(filename); + if (it == file_partition_columns.end()) + { + auto partition_values = add_object->get("partitionValues").extract(); + if (partition_values->size()) + { + auto & current_partition_columns = file_partition_columns[filename]; + for (const auto & partition_name : partition_values->getNames()) + { + const auto value = partition_values->getValue(partition_name); + auto name_and_type = file_schema.tryGetByName(partition_name); + if (!name_and_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", partition_name); + + auto field = getFieldValue(value, name_and_type->type); + current_partition_columns.emplace_back(*name_and_type, field); + + LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); + } + } + } } - else if (json.has("remove")) + else if (object->has("remove")) { - const auto path = json["remove"]["path"].getString(); - result.erase(std::filesystem::path(configuration->getPath()) / path); + auto path = object->get("remove").extract()->getValue("path"); + result.erase(fs::path(configuration->getPath()) / path); + } + if (object->has("metaData")) + { + const auto metadata_object = object->get("metaData").extract(); + const auto schema_object = metadata_object->getValue("schemaString"); + + Poco::JSON::Parser p; + Poco::Dynamic::Var fields_json = parser.parse(schema_object); + Poco::JSON::Object::Ptr fields_object = fields_json.extract(); + + const auto fields = fields_object->get("fields").extract(); + NamesAndTypesList current_schema; + for (size_t i = 0; i < fields->size(); ++i) + { + const auto field = fields->getObject(static_cast(i)); + auto column_name = field->getValue("name"); + auto type = field->getValue("type"); + auto is_nullable = field->getValue("nullable"); + + std::string physical_name; + auto schema_metadata_object = field->get("metadata").extract(); + if (schema_metadata_object->has("delta.columnMapping.physicalName")) + physical_name = schema_metadata_object->getValue("delta.columnMapping.physicalName"); + else + physical_name = column_name; + + LOG_TEST(log, "Found column: {}, type: {}, nullable: {}, physical name: {}", + column_name, type, is_nullable, physical_name); + + current_schema.push_back({physical_name, getFieldType(field, "type", is_nullable)}); + } + + if (file_schema.empty()) + { + file_schema = current_schema; + } + else if (file_schema != current_schema) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Reading from files with different schema is not possible " + "({} is different from {})", + file_schema.toString(), current_schema.toString()); + } } } } + DataTypePtr getFieldType(const Poco::JSON::Object::Ptr & field, const String & type_key, bool is_nullable) + { + if (field->isObject(type_key)) + return getComplexTypeFromObject(field->getObject(type_key)); + + auto type = field->get(type_key); + if (type.isString()) + { + const String & type_name = type.extract(); + auto data_type = getSimpleTypeByName(type_name); + return is_nullable ? makeNullable(data_type) : data_type; + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected 'type' field: {}", type.toString()); + } + + Field getFieldValue(const String & value, DataTypePtr data_type) + { + DataTypePtr check_type; + if (data_type->isNullable()) + check_type = static_cast(data_type.get())->getNestedType(); + else + check_type = data_type; + + WhichDataType which(check_type->getTypeId()); + if (which.isStringOrFixedString()) + return value; + else if (which.isInt8()) + return parse(value); + else if (which.isUInt8()) + return parse(value); + else if (which.isInt16()) + return parse(value); + else if (which.isUInt16()) + return parse(value); + else if (which.isInt32()) + return parse(value); + else if (which.isUInt32()) + return parse(value); + else if (which.isInt64()) + return parse(value); + else if (which.isUInt64()) + return parse(value); + else if (which.isFloat32()) + return parse(value); + else if (which.isFloat64()) + return parse(value); + else if (which.isDate()) + return UInt16{LocalDate{std::string(value)}.getDayNum()}; + else if (which.isDate32()) + return Int32{LocalDate{std::string(value)}.getExtenedDayNum()}; + else if (which.isDateTime64()) + { + ReadBufferFromString in(value); + DateTime64 time = 0; + readDateTime64Text(time, 6, in, assert_cast(data_type.get())->getTimeZone()); + return time; + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type for {}", check_type->getColumnType()); + } + + DataTypePtr getSimpleTypeByName(const String & type_name) + { + /// https://github.com/delta-io/delta/blob/master/PROTOCOL.md#primitive-types + + if (type_name == "string" || type_name == "binary") + return std::make_shared(); + if (type_name == "long") + return std::make_shared(); + if (type_name == "integer") + return std::make_shared(); + if (type_name == "short") + return std::make_shared(); + if (type_name == "byte") + return std::make_shared(); + if (type_name == "float") + return std::make_shared(); + if (type_name == "double") + return std::make_shared(); + if (type_name == "boolean") + return DataTypeFactory::instance().get("Bool"); + if (type_name == "date") + return std::make_shared(); + if (type_name == "timestamp") + return std::make_shared(6); + if (type_name.starts_with("decimal(") && type_name.ends_with(')')) + { + ReadBufferFromString buf(std::string_view(type_name.begin() + 8, type_name.end() - 1)); + size_t precision; + size_t scale; + readIntText(precision, buf); + skipWhitespaceIfAny(buf); + assertChar(',', buf); + skipWhitespaceIfAny(buf); + tryReadIntText(scale, buf); + return createDecimal(precision, scale); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type: {}", type_name); + } + + DataTypePtr getComplexTypeFromObject(const Poco::JSON::Object::Ptr & type) + { + String type_name = type->getValue("type"); + + if (type_name == "struct") + { + DataTypes element_types; + Names element_names; + auto fields = type->get("fields").extract(); + element_types.reserve(fields->size()); + element_names.reserve(fields->size()); + for (size_t i = 0; i != fields->size(); ++i) + { + auto field = fields->getObject(static_cast(i)); + element_names.push_back(field->getValue("name")); + auto required = field->getValue("required"); + element_types.push_back(getFieldType(field, "type", required)); + } + + return std::make_shared(element_types, element_names); + } + + if (type_name == "array") + { + bool is_nullable = type->getValue("containsNull"); + auto element_type = getFieldType(type, "elementType", is_nullable); + return std::make_shared(element_type); + } + + if (type_name == "map") + { + bool is_nullable = type->getValue("containsNull"); + auto key_type = getFieldType(type, "keyType", /* is_nullable */false); + auto value_type = getFieldType(type, "valueType", is_nullable); + return std::make_shared(key_type, value_type); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type: {}", type_name); + } + /** * Checkpoints in delta-lake are created each 10 commits by default. * Latest checkpoint is written in _last_checkpoint file: _delta_log/_last_checkpoint @@ -277,8 +529,8 @@ struct DeltaLakeMetadata::Impl ArrowMemoryPool::instance(), &reader)); - std::shared_ptr schema; - THROW_ARROW_NOT_OK(reader->GetSchema(&schema)); + std::shared_ptr file_schema; + THROW_ARROW_NOT_OK(reader->GetSchema(&file_schema)); ArrowColumnToCHColumn column_reader( header, "Parquet", @@ -327,16 +579,13 @@ DeltaLakeMetadata::DeltaLakeMetadata( ContextPtr context_) : impl(std::make_unique(object_storage_, configuration_, context_)) { -} - -Strings DeltaLakeMetadata::getDataFiles() const -{ - if (!data_files.empty()) - return data_files; - auto result = impl->processMetadataFiles(); - data_files = Strings(result.begin(), result.end()); - return data_files; + data_files = result.data_files; + schema = result.schema; + partition_columns = result.partition_columns; + + LOG_TRACE(impl->log, "Found {} data files, {} partition files, schema: {}", + data_files.size(), partition_columns.size(), schema.toString()); } } diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h index e527721b29e..da9fa1e76ce 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h @@ -20,9 +20,13 @@ public: ConfigurationPtr configuration_, ContextPtr context_); - Strings getDataFiles() const override; + Strings getDataFiles() const override { return data_files; } - NamesAndTypesList getTableSchema() const override { return {}; } + NamesAndTypesList getTableSchema() const override { return schema; } + + DataLakePartitionColumns getPartitionColumns() const override { return partition_columns; } + + const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } bool operator ==(const IDataLakeMetadata & other) const override { @@ -44,6 +48,10 @@ private: struct Impl; const std::shared_ptr impl; mutable Strings data_files; + + NamesAndTypesList schema; + std::unordered_map column_name_to_physical_name; + DataLakePartitionColumns partition_columns; }; } diff --git a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h index 3ab274b1fbf..ac978732804 100644 --- a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h @@ -26,6 +26,8 @@ public: NamesAndTypesList getTableSchema() const override { return {}; } + const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } + bool operator ==(const IDataLakeMetadata & other) const override { const auto * hudi_metadata = dynamic_cast(&other); @@ -46,6 +48,7 @@ private: const ObjectStoragePtr object_storage; const ConfigurationPtr configuration; mutable Strings data_files; + std::unordered_map column_name_to_physical_name; Strings getDataFilesImpl() const; }; diff --git a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h index a2bd5adb947..53b8abf7a5c 100644 --- a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h @@ -2,6 +2,7 @@ #include #include #include +#include "PartitionColumns.h" namespace DB { @@ -13,6 +14,8 @@ public: virtual Strings getDataFiles() const = 0; virtual NamesAndTypesList getTableSchema() const = 0; virtual bool operator==(const IDataLakeMetadata & other) const = 0; + virtual DataLakePartitionColumns getPartitionColumns() const { return {}; } + virtual const std::unordered_map & getColumnNameToPhysicalNameMapping() const = 0; }; using DataLakeMetadataPtr = std::unique_ptr; diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index 83865c47eb8..64711d1774c 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -81,7 +81,7 @@ public: auto metadata = DataLakeMetadata::create(object_storage_, base_configuration, local_context); auto schema_from_metadata = metadata->getTableSchema(); - if (schema_from_metadata != NamesAndTypesList{}) + if (!schema_from_metadata.empty()) { return ColumnsDescription(std::move(schema_from_metadata)); } @@ -99,6 +99,7 @@ public: Storage::updateConfiguration(local_context); auto new_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); + Storage::partition_columns = new_metadata->getPartitionColumns(); if (current_metadata && *current_metadata == *new_metadata) return; @@ -128,6 +129,30 @@ public: private: ConfigurationPtr base_configuration; DataLakeMetadataPtr current_metadata; + + ReadFromFormatInfo prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr local_context) override + { + auto info = DB::prepareReadingFromFormat(requested_columns, storage_snapshot, supports_subset_of_columns); + if (!current_metadata) + { + Storage::updateConfiguration(local_context); + current_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); + } + auto column_mapping = current_metadata->getColumnNameToPhysicalNameMapping(); + if (!column_mapping.empty()) + { + for (const auto & [column_name, physical_name] : column_mapping) + { + auto & column = info.format_header.getByName(column_name); + column.name = physical_name; + } + } + return info; + } }; using StorageIceberg = IStorageDataLake; diff --git a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h index 06dbd373bf9..39673a03cb1 100644 --- a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h @@ -82,6 +82,8 @@ public: /// Get table schema parsed from metadata. NamesAndTypesList getTableSchema() const override { return schema; } + const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } + bool operator ==(const IDataLakeMetadata & other) const override { const auto * iceberg_metadata = dynamic_cast(&other); @@ -104,6 +106,7 @@ private: Int32 current_schema_id; NamesAndTypesList schema; mutable Strings data_files; + std::unordered_map column_name_to_physical_name; LoggerPtr log; }; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 2c8e60b49d0..14bbad659f1 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -111,7 +111,8 @@ public: const bool need_only_count_, ContextPtr context_, size_t max_block_size_, - size_t num_streams_) + size_t num_streams_, + const DataLakePartitionColumns & partition_columns_) : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_) , object_storage(object_storage_) , configuration(configuration_) @@ -123,6 +124,7 @@ public: , max_block_size(max_block_size_) , num_streams(num_streams_) , distributed_processing(distributed_processing_) + , partition_columns(partition_columns_) { } @@ -161,7 +163,7 @@ public: { auto source = std::make_shared( getName(), object_storage, configuration, info, format_settings, - context, max_block_size, iterator_wrapper, max_parsing_threads, need_only_count); + context, max_block_size, iterator_wrapper, max_parsing_threads, need_only_count, partition_columns); source->setKeyCondition(filter_actions_dag, context); pipes.emplace_back(std::move(source)); @@ -190,6 +192,7 @@ private: const size_t max_block_size; size_t num_streams; const bool distributed_processing; + DataLakePartitionColumns partition_columns; void createIterator(const ActionsDAG::Node * predicate) { @@ -203,6 +206,15 @@ private: }; } +ReadFromFormatInfo StorageObjectStorage::prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr /* local_context */) +{ + return DB::prepareReadingFromFormat(requested_columns, storage_snapshot, supports_subset_of_columns); +} + void StorageObjectStorage::read( QueryPlan & query_plan, const Names & column_names, @@ -222,7 +234,7 @@ void StorageObjectStorage::read( } const auto read_from_format_info = prepareReadingFromFormat( - column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); + column_names, storage_snapshot, supportsSubsetOfColumns(local_context), local_context); const bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; @@ -240,7 +252,8 @@ void StorageObjectStorage::read( need_only_count, local_context, max_block_size, - num_streams); + num_streams, + partition_columns); query_plan.addStep(std::move(read_step)); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index f45d8c1f01a..645f97201ec 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { @@ -117,6 +118,12 @@ public: protected: virtual void updateConfiguration(ContextPtr local_context); + virtual ReadFromFormatInfo prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr local_context); + static std::unique_ptr createReadBufferIterator( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, @@ -129,6 +136,7 @@ protected: const std::optional format_settings; const ASTPtr partition_by; const bool distributed_processing; + mutable DataLakePartitionColumns partition_columns; LoggerPtr log; }; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index b31d0f8a92e..97c1ecc38b9 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -49,7 +49,8 @@ StorageObjectStorageSource::StorageObjectStorageSource( UInt64 max_block_size_, std::shared_ptr file_iterator_, size_t max_parsing_threads_, - bool need_only_count_) + bool need_only_count_, + const DataLakePartitionColumns & partition_columns_) : SourceWithKeyCondition(info.source_header, false) , WithContext(context_) , name(std::move(name_)) @@ -68,6 +69,7 @@ StorageObjectStorageSource::StorageObjectStorageSource( , columns_desc(info.columns_description) , file_iterator(file_iterator_) , schema_cache(StorageObjectStorage::getSchemaCache(context_, configuration->getTypeName())) + , partition_columns(partition_columns_) , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(*create_reader_pool, "Reader")) { } @@ -201,6 +203,33 @@ Chunk StorageObjectStorageSource::generate() getUniqueStoragePathIdentifier(*configuration, reader.getObjectInfo(), false), object_info.metadata->size_bytes, &filename); + if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) + { + auto partition_values = partition_columns.find(filename); + + for (const auto & [name_and_type, value] : partition_values->second) + { + if (!read_from_format_info.source_header.has(name_and_type.name)) + continue; + + auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name); + + const auto & type = name_and_type.type; + auto partition_column = type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); + /// This column is filled with default value now, remove it. + chunk.erase(column_pos); + /// Add correct values. + if (chunk.hasColumns()) + { + chunk.addColumn(column_pos, std::move(partition_column)); + } + else + { + chunk.addColumn(std::move(partition_column)); + } + } + } + return chunk; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index fd7c7aa7102..ab8d9588155 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -39,7 +40,8 @@ public: UInt64 max_block_size_, std::shared_ptr file_iterator_, size_t max_parsing_threads_, - bool need_only_count_); + bool need_only_count_, + const DataLakePartitionColumns & partition_columns_ = {}); ~StorageObjectStorageSource() override; @@ -81,6 +83,7 @@ protected: bool initialized = false; size_t total_rows_in_file = 0; LoggerPtr log = getLogger("StorageObjectStorageSource"); + DataLakePartitionColumns partition_columns; struct ReaderHolder : private boost::noncopyable { From 3f7a9738fee0f7bd79a4161b6087bdcb19f577cc Mon Sep 17 00:00:00 2001 From: divanik Date: Fri, 7 Jun 2024 09:04:28 +0000 Subject: [PATCH 107/439] Fix compilation bug --- programs/disks/DisksApp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 8d1c4b24fc1..d50e3082e23 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -430,7 +430,7 @@ int DisksApp::main(const std::vector & /*args*/) auto validator = [](const Poco::Util::AbstractConfiguration &, const std::string &, const std::string &) { return true; }; constexpr auto config_prefix = "storage_configuration.disks"; - auto disk_selector = std::make_shared(std::unordered_set{"cache", "encrypted"}); + auto disk_selector = std::make_shared(std::unordered_set{"cache", "encrypted"}); disk_selector->initialize(config(), config_prefix, global_context, validator); std::vector>> disks_with_path; From 1c1628db0f00d25c97e552e1b63d2f2a5cae9ee0 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 7 Jun 2024 13:43:26 +0200 Subject: [PATCH 108/439] Minor --- .../ObjectStorage/DataLakes/DeltaLakeMetadata.h | 2 +- src/Storages/ObjectStorage/DataLakes/HudiMetadata.h | 3 +++ .../ObjectStorage/DataLakes/IDataLakeMetadata.h | 2 +- .../ObjectStorage/DataLakes/IStorageDataLake.h | 5 ++++- src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h | 3 +++ .../ObjectStorage/DataLakes/PartitionColumns.h | 2 ++ .../ObjectStorage/StorageObjectStorageSource.cpp | 10 +++------- src/TableFunctions/ITableFunctionDataLake.h | 2 -- 8 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h index da9fa1e76ce..926bd1b451d 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h @@ -24,7 +24,7 @@ public: NamesAndTypesList getTableSchema() const override { return schema; } - DataLakePartitionColumns getPartitionColumns() const override { return partition_columns; } + const DataLakePartitionColumns & getPartitionColumns() const override { return partition_columns; } const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } diff --git a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h index ac978732804..b060b1b0d39 100644 --- a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h @@ -26,6 +26,8 @@ public: NamesAndTypesList getTableSchema() const override { return {}; } + const DataLakePartitionColumns & getPartitionColumns() const override { return partition_columns; } + const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } bool operator ==(const IDataLakeMetadata & other) const override @@ -49,6 +51,7 @@ private: const ConfigurationPtr configuration; mutable Strings data_files; std::unordered_map column_name_to_physical_name; + DataLakePartitionColumns partition_columns; Strings getDataFilesImpl() const; }; diff --git a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h index 53b8abf7a5c..2954d50db91 100644 --- a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h @@ -14,7 +14,7 @@ public: virtual Strings getDataFiles() const = 0; virtual NamesAndTypesList getTableSchema() const = 0; virtual bool operator==(const IDataLakeMetadata & other) const = 0; - virtual DataLakePartitionColumns getPartitionColumns() const { return {}; } + virtual const DataLakePartitionColumns & getPartitionColumns() const = 0; virtual const std::unordered_map & getColumnNameToPhysicalNameMapping() const = 0; }; using DataLakeMetadataPtr = std::unique_ptr; diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index 64711d1774c..97fb9890490 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -99,7 +99,10 @@ public: Storage::updateConfiguration(local_context); auto new_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); - Storage::partition_columns = new_metadata->getPartitionColumns(); + auto partition_columns = new_metadata->getPartitionColumns(); + + if (partition_columns != Storage::partition_columns) + Storage::partition_columns = partition_columns; if (current_metadata && *current_metadata == *new_metadata) return; diff --git a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h index 39673a03cb1..9476ac6e7d9 100644 --- a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h @@ -84,6 +84,8 @@ public: const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } + const DataLakePartitionColumns & getPartitionColumns() const override { return partition_columns; } + bool operator ==(const IDataLakeMetadata & other) const override { const auto * iceberg_metadata = dynamic_cast(&other); @@ -107,6 +109,7 @@ private: NamesAndTypesList schema; mutable Strings data_files; std::unordered_map column_name_to_physical_name; + DataLakePartitionColumns partition_columns; LoggerPtr log; }; diff --git a/src/Storages/ObjectStorage/DataLakes/PartitionColumns.h b/src/Storages/ObjectStorage/DataLakes/PartitionColumns.h index 604dbbf78fa..eb605559145 100644 --- a/src/Storages/ObjectStorage/DataLakes/PartitionColumns.h +++ b/src/Storages/ObjectStorage/DataLakes/PartitionColumns.h @@ -9,6 +9,8 @@ struct DataLakePartitionColumn { NameAndTypePair name_and_type; Field value; + + bool operator ==(const DataLakePartitionColumn & other) const = default; }; /// Data file -> partition columns diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 97c1ecc38b9..3ae51cd4235 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -212,21 +212,17 @@ Chunk StorageObjectStorageSource::generate() if (!read_from_format_info.source_header.has(name_and_type.name)) continue; - auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name); + const auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name); + auto partition_column = name_and_type.type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); - const auto & type = name_and_type.type; - auto partition_column = type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); /// This column is filled with default value now, remove it. chunk.erase(column_pos); + /// Add correct values. if (chunk.hasColumns()) - { chunk.addColumn(column_pos, std::move(partition_column)); - } else - { chunk.addColumn(std::move(partition_column)); - } } } diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index f7915643a08..fe6e5b3e593 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -69,8 +69,6 @@ protected: /// Set default format to Parquet if it's not specified in arguments. TableFunction::parseArguments(ast_function, context); } - - ColumnsDescription structure_hint; }; struct TableFunctionIcebergName From 273571c6f519b99c556b3b443b391e5dc592a682 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 7 Jun 2024 19:05:19 +0200 Subject: [PATCH 109/439] fix tests --- src/Processors/Transforms/buildPushingToViewsChain.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index b259e803f80..8ba172bf32b 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -562,6 +562,9 @@ Chain buildPushingToViewsChain( auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); sink->setRuntimeData(thread_status, elapsed_counter_ms); + + result_chain.addSource(std::make_shared(sink->getHeader())); + result_chain.addSource(std::move(sink)); } From 3db3b365ea46ee1fc388a1788ce59b9426b99c71 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 10 Jun 2024 15:42:13 +0200 Subject: [PATCH 110/439] fix tests --- src/Processors/Transforms/buildPushingToViewsChain.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 8ba172bf32b..ed44a20e397 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -567,6 +567,10 @@ Chain buildPushingToViewsChain( result_chain.addSource(std::move(sink)); } + else + { + result_chain.addSource(std::make_shared(storage_header)); + } if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); From 78325c89cccbb034a3cdcaf606eb6e2919096f8b Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 10 Jun 2024 16:17:36 +0000 Subject: [PATCH 111/439] Change help message --- programs/disks/CMakeLists.txt | 3 +- programs/disks/CommandChangeDirectory.cpp | 2 +- programs/disks/CommandHelp.cpp | 42 +++++ programs/disks/CommandMkDir.cpp | 2 +- programs/disks/DisksApp.cpp | 190 +++++++++++++--------- programs/disks/DisksApp.h | 17 +- programs/disks/ICommand.h | 1 + 7 files changed, 177 insertions(+), 80 deletions(-) create mode 100644 programs/disks/CommandHelp.cpp diff --git a/programs/disks/CMakeLists.txt b/programs/disks/CMakeLists.txt index 0f3cb601750..2bf17a352e6 100644 --- a/programs/disks/CMakeLists.txt +++ b/programs/disks/CMakeLists.txt @@ -12,7 +12,8 @@ set (CLICKHOUSE_DISKS_SOURCES CommandRead.cpp CommandRemove.cpp CommandSwitchDisk.cpp - CommandWrite.cpp) + CommandWrite.cpp + CommandHelp.cpp) if (CLICKHOUSE_CLOUD) set (CLICKHOUSE_DISKS_SOURCES ${CLICKHOUSE_DISKS_SOURCES} CommandPackedIO.cpp) diff --git a/programs/disks/CommandChangeDirectory.cpp b/programs/disks/CommandChangeDirectory.cpp index 71cdae904e5..5e6a08cd3fd 100644 --- a/programs/disks/CommandChangeDirectory.cpp +++ b/programs/disks/CommandChangeDirectory.cpp @@ -13,7 +13,7 @@ public: explicit CommandChangeDirectory() : ICommand() { command_name = "cd"; - description = "Change directory (makes sense only in interactive mode)"; + description = "Change directory"; options_description.add_options()("path", po::value(), "the path we want to get to (mandatory, positional)")( "disk", po::value(), "A disk where the path is changed"); positional_options_description.add("path", 1); diff --git a/programs/disks/CommandHelp.cpp b/programs/disks/CommandHelp.cpp new file mode 100644 index 00000000000..becdae324b3 --- /dev/null +++ b/programs/disks/CommandHelp.cpp @@ -0,0 +1,42 @@ +#include "DisksApp.h" +#include "ICommand.h" + +#include +#include + +namespace DB +{ + +class CommandHelp final : public ICommand +{ +public: + explicit CommandHelp(const DisksApp & disks_app_) : disks_app(disks_app_) + { + command_name = "help"; + description = "Print help message about available commands (all or only required)"; + options_description.add_options()("command", po::value(), "A command to help with"); + positional_options_description.add("command", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & /*client*/) override + { + std::optional command = getValueFromCommandLineOptionsWithOptional(options, "command"); + if (command.has_value()) + { + disks_app.printCommandHelpMessage(command.value()); + } + else + { + disks_app.printAvailableCommandsHelpMessage(); + } + } + + const DisksApp & disks_app; +}; + +CommandPtr makeCommandHelp(const DisksApp & disks_app) +{ + return std::make_shared(disks_app); +} + +} diff --git a/programs/disks/CommandMkDir.cpp b/programs/disks/CommandMkDir.cpp index 895602adf72..3ea6df5622d 100644 --- a/programs/disks/CommandMkDir.cpp +++ b/programs/disks/CommandMkDir.cpp @@ -12,7 +12,7 @@ public: CommandMkDir() { command_name = "mkdir"; - description = "Create a directory"; + description = "Creates a directory"; options_description.add_options()("recursive", "recursively create directories")( "path", po::value(), "the path of listing (mandatory, positional)"); positional_options_description.add("path", 1); diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index d50e3082e23..296567c4b35 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -6,6 +6,7 @@ #include #include "DisksClient.h" #include "ICommand.h" +#include "ICommand_fwd.h" #include #include @@ -45,11 +46,12 @@ CommandPtr DisksApp::getCommandByName(const String & command) const } } -std::vector DisksApp::getEmptyCompletion(CommandPtr command_) const +std::vector DisksApp::getEmptyCompletion(String command_name) const { + auto command_ptr = command_descriptions.at(command_name); auto answer = [&]() -> std::vector { - if (multidisk_commands.contains(command_->command_name)) + if (multidisk_commands.contains(command_ptr->command_name)) { return client->getAllFilesByPatternFromAllDisks(""); } @@ -62,14 +64,49 @@ std::vector DisksApp::getEmptyCompletion(CommandPtr command_) const { answer.push_back(disk_name); } - for (const auto & option : command_->options_description.options()) + for (const auto & option : command_ptr->options_description.options()) { answer.push_back("--" + option->long_name()); } + if (command_name == "help") + { + for (const auto & [current_command_name, description] : command_descriptions) + { + answer.push_back(current_command_name); + } + } std::sort(answer.begin(), answer.end()); return answer; } +std::vector DisksApp::getCommandsToComplete(const String & command_prefix) const +{ + std::vector answer{}; + for (const auto & [word, _] : command_descriptions) + { + if (word.starts_with(command_prefix)) + { + answer.push_back(word); + } + } + if (!answer.empty()) + { + return answer; + } + for (const auto & [word, _] : aliases) + { + if (word.starts_with(command_prefix)) + { + answer.push_back(word); + } + } + if (!answer.empty()) + { + return answer; + } + return {command_prefix}; +} + std::vector DisksApp::getCompletions(const String & prefix) const { auto arguments = po::split_unix(prefix, word_break_characters); @@ -88,35 +125,12 @@ std::vector DisksApp::getCompletions(const String & prefix) const { return {arguments.back()}; } - return getEmptyCompletion(command); + return getEmptyCompletion(command->command_name); } else if (arguments.size() == 1) { String command_prefix = arguments[0]; - std::vector answer{}; - for (const auto & [word, _] : command_descriptions) - { - if (word.starts_with(command_prefix)) - { - answer.push_back(word); - } - } - if (!answer.empty()) - { - return answer; - } - for (const auto & [word, _] : aliases) - { - if (word.starts_with(command_prefix)) - { - answer.push_back(word); - } - } - if (!answer.empty()) - { - return answer; - } - return {command_prefix}; + return getCommandsToComplete(command_prefix); } else { @@ -130,31 +144,39 @@ std::vector DisksApp::getCompletions(const String & prefix) const { return {last_token}; } - auto answer = [&]() -> std::vector + std::vector answer = {}; + if (command->command_name == "help") { - if (multidisk_commands.contains(command->command_name)) - { - return client->getAllFilesByPatternFromAllDisks(last_token); - } - else - { - return client->getCurrentDiskWithPath().getAllFilesByPattern(last_token); - } - }(); - - for (const auto & disk_name : client->getAllDiskNames()) - { - if (disk_name.starts_with(last_token)) - { - answer.push_back(disk_name); - } + return getCommandsToComplete(last_token); } - for (const auto & option : command->options_description.options()) + else { - String option_sign = "--" + option->long_name(); - if (option_sign.starts_with(last_token)) + answer = [&]() -> std::vector { - answer.push_back(option_sign); + if (multidisk_commands.contains(command->command_name)) + { + return client->getAllFilesByPatternFromAllDisks(last_token); + } + else + { + return client->getCurrentDiskWithPath().getAllFilesByPattern(last_token); + } + }(); + + for (const auto & disk_name : client->getAllDiskNames()) + { + if (disk_name.starts_with(last_token)) + { + answer.push_back(disk_name); + } + } + for (const auto & option : command->options_description.options()) + { + String option_sign = "--" + option->long_name(); + if (option_sign.starts_with(last_token)) + { + answer.push_back(option_sign); + } } } if (!answer.empty()) @@ -266,6 +288,7 @@ void DisksApp::addOptions() command_descriptions.emplace("read", makeCommandRead()); command_descriptions.emplace("mkdir", makeCommandMkDir()); command_descriptions.emplace("switch-disk", makeCommandSwitchDisk()); + command_descriptions.emplace("help", makeCommandHelp(*this)); #ifdef CLICKHOUSE_CLOUD command_descriptions.emplace("packed-io", makeCommandPackedIO()); #endif @@ -293,44 +316,64 @@ void DisksApp::processOptions() } -void DisksApp::printEntryHelpMessage() +void DisksApp::printEntryHelpMessage() const { - std::cout << "ClickHouse disk management tool\n"; + std::cout << "\x1b[1;33m ClickHouse disk management tool \x1b[0m \n"; std::cout << options_description << '\n'; } -void DisksApp::printAvailableCommandsHelpMessage() +void DisksApp::printAvailableCommandsHelpMessage() const { - std::cout << "\x1b[1;33mAvailable commands:\x1b[0m\n"; - std::vector> commands_with_aliases_and_descrtiptions{}; + std::cout << "\x1b[1;32mAvailable commands:\x1b[0m\n"; + std::vector> commands_with_aliases_and_descrtiptions{}; size_t maximal_command_length = 0; - for (const auto & [current_command, _] : command_descriptions) + for (const auto & [command_name, command_ptr] : command_descriptions) { - std::string command_string = command_descriptions[current_command]->command_name; - bool need_comma = false; - for (const auto & [alias_name, alias_command_name] : aliases) - { - if (alias_command_name == current_command) - { - if (std::exchange(need_comma, true)) - command_string += ","; - else - command_string += "("; - command_string += alias_name; - } - } - command_string += (need_comma ? ")" : ""); + std::string command_string = getCommandLineWithAliases(command_ptr); maximal_command_length = std::max(maximal_command_length, command_string.size()); - commands_with_aliases_and_descrtiptions.push_back({std::move(command_string), command_descriptions[current_command]->command_name}); + commands_with_aliases_and_descrtiptions.push_back({std::move(command_string), command_descriptions.at(command_name)}); } - for (const auto & [command_with_aliases, description] : commands_with_aliases_and_descrtiptions) + for (const auto & [command_with_aliases, command_ptr] : commands_with_aliases_and_descrtiptions) { - std::cout << "\x1b[1;32m" << command_with_aliases << "\x1b[0m" - << std::string(maximal_command_length + 2 - command_with_aliases.size(), ' ') << description << "\n"; + std::cout << "\x1b[1;33m" << command_with_aliases << "\x1b[0m" << std::string(5, ' ') << "\x1b[1;33m" << command_ptr->description + << "\x1b[0m \n"; + std::cout << command_ptr->options_description; + std::cout << std::endl; } } +void DisksApp::printCommandHelpMessage(CommandPtr command) const +{ + String command_name_with_aliases = getCommandLineWithAliases(command); + std::cout << "\x1b[1;32m" << command_name_with_aliases << "\x1b[0m" << std::string(2, ' ') << command->description << "\n"; + std::cout << command->options_description; +} + +void DisksApp::printCommandHelpMessage(String command_name) const +{ + printCommandHelpMessage(getCommandByName(command_name)); +} + +String DisksApp::getCommandLineWithAliases(CommandPtr command) const +{ + String command_string = command->command_name; + bool need_comma = false; + for (const auto & [alias_name, alias_command_name] : aliases) + { + if (alias_command_name == command->command_name) + { + if (std::exchange(need_comma, true)) + command_string += ","; + else + command_string += "("; + command_string += alias_name; + } + } + command_string += (need_comma ? ")" : ""); + return command_string; +} + void DisksApp::initializeHistoryFile() { String home_path; @@ -423,6 +466,7 @@ int DisksApp::main(const std::vector & /*args*/) global_context->setApplicationType(Context::ApplicationType::DISKS); String path = config().getString("path", DBMS_DEFAULT_PATH); + global_context->setPath(path); String main_disk = config().getString("disk", "default"); diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h index fad597335f0..ff05a5002e4 100644 --- a/programs/disks/DisksApp.h +++ b/programs/disks/DisksApp.h @@ -38,12 +38,16 @@ public: static void parseAndCheckOptions( const std::vector & arguments, const ProgramOptionsDescription & options_description, CommandLineOptions & options); - void printEntryHelpMessage(); - void printAvailableCommandsHelpMessage(); + void printEntryHelpMessage() const; + void printAvailableCommandsHelpMessage() const; + void printCommandHelpMessage(String command_name) const; + void printCommandHelpMessage(CommandPtr command) const; + String getCommandLineWithAliases(CommandPtr command) const; + std::vector getCompletions(const String & prefix) const; - std::vector getEmptyCompletion(CommandPtr command_) const; + std::vector getEmptyCompletion(String command_name) const; ~DisksApp() override; @@ -52,6 +56,8 @@ private: String getDefaultConfigFileName(); + std::vector getCommandsToComplete(const String & command_prefix) const; + // Fields responsible for the REPL work String history_file; LineReader::Suggest suggest; @@ -79,7 +85,10 @@ private: {"delete", "remove"}, {"ls-disks", "list-disks"}, {"ls_disks", "list-disks"}, - {"packed_io", "packed-io"}}; + {"packed_io", "packed-io"}, + {"change-dir", "cd"}, + {"change_dir", "cd"}, + {"switch_disk", "switch-disk"}}; std::set multidisk_commands = {"copy", "packed-io", "switch-disk", "cd"}; diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h index d726d50ba13..97013717784 100644 --- a/programs/disks/ICommand.h +++ b/programs/disks/ICommand.h @@ -121,6 +121,7 @@ DB::CommandPtr makeCommandRemove(); DB::CommandPtr makeCommandWrite(); DB::CommandPtr makeCommandMkDir(); DB::CommandPtr makeCommandSwitchDisk(); +DB::CommandPtr makeCommandHelp(const DisksApp & disks_app); #ifdef CLICKHOUSE_CLOUD DB::CommandPtr makeCommandPackedIO(); #endif From bdcf3a0739580c8c1e9689dfa416b2fae07feed7 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 10 Jun 2024 22:16:26 +0200 Subject: [PATCH 112/439] fix tidy build --- src/Storages/MergeTree/MergeTreeSink.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index b31e7e6a562..faf3267a759 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -189,7 +189,7 @@ void MergeTreeSink::consume(Chunk & chunk) { .temp_part = std::move(temp_part), .elapsed_ns = elapsed_ns, - .block_dedup_token = std::move(block_dedup_token), + .block_dedup_token = block_dedup_token, .part_counters = std::move(part_counters), }); } From 24bf946c00bc9e681ec4b26dbdae0a7a786bf355 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 11 Jun 2024 19:44:49 +0200 Subject: [PATCH 113/439] rm debug printing --- src/Core/Settings.h | 1 - src/Interpreters/InterpreterInsertQuery.cpp | 31 ----------------- src/Interpreters/SquashingTransform.cpp | 12 ------- .../DeduplicationTokenTransforms.cpp | 11 ------ .../Transforms/ExpressionTransform.cpp | 2 -- .../Transforms/SquashingChunksTransform.cpp | 18 ---------- src/Storages/MergeTree/MergeTreeSink.cpp | 34 ------------------- .../MergeTree/MergedBlockOutputStream.cpp | 3 -- .../MergeTree/ReplicatedMergeTreeSink.cpp | 13 ------- src/Storages/WindowView/StorageWindowView.cpp | 11 ------ 10 files changed, 136 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4128f24052b..d6779a531ae 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -626,7 +626,6 @@ class IColumn; M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ - M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 758ac4ab954..64fccdbe14d 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -309,9 +309,6 @@ Chain InterpreterInsertQuery::buildSink( ThreadGroupPtr running_group, std::atomic_uint64_t * elapsed_counter_ms) { - // LOG_DEBUG(getLogger("InsertQuery"), - // "called InterpreterInsertQuery::buildSink() engine {} table name {}.{}", table->getName(), table->getStorageID().database_name, table->getStorageID().table_name); - ThreadStatus * thread_status = current_thread; if (!thread_status_holder) @@ -413,10 +410,6 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < sink_streams; ++i) { - // LOG_DEBUG(getLogger("InsertQuery"), - // "call buildSink sink_streams table name {}.{}, stream {}/{}", - // table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams); - auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, running_group, /* elapsed_counter_ms= */ nullptr); @@ -425,10 +418,6 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < presink_streams; ++i) { - // LOG_DEBUG(getLogger("InsertQuery"), - // "call buildSink presink_streams table name {}.{}, stream {}/{}", - // table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); - auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); presink_chains.emplace_back(std::move(out)); } @@ -462,9 +451,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & ContextPtr select_context = getContext(); - // LOG_DEBUG(getLogger("InsertQuery"), - // "execute() is_trivial_insert_select {} prefersLargeBlocks={} max_insert_threads {}", is_trivial_insert_select, table->prefersLargeBlocks(), settings.max_insert_threads); - if (is_trivial_insert_select) { /** When doing trivial INSERT INTO ... SELECT ... FROM table, @@ -511,11 +497,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & pipeline.dropTotalsAndExtremes(); - // LOG_DEBUG(getLogger("InsertQuery"), - // "adding transforms, pipline size {}, threads {}, max_insert_threads {}", - // pipeline.getNumStreams(), pipeline.getNumThreads(), settings.max_insert_threads); - - /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. if (getContext()->getSettingsRef().insert_null_as_default) { @@ -743,14 +724,6 @@ BlockIO InterpreterInsertQuery::execute() StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); - // bool is_table_dist = false; - // if (auto * dist_storage = dynamic_cast(table.get())) - // { - // is_table_dist = true; - // // LOG_DEBUG(getLogger("InsertQuery"), - // // "dist_storage engine {} table name {}.{}", dist_storage->getName(), dist_storage->getStorageID().database_name, dist_storage->getStorageID().table_name); - // } - if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); @@ -780,24 +753,20 @@ BlockIO InterpreterInsertQuery::execute() auto distributed = table->distributedWrite(query, getContext()); if (distributed) { - // LOG_DEBUG(getLogger("InsertQuery"),"as dist pipeline, is_table_dist {}", is_table_dist); res.pipeline = std::move(*distributed); } else { - // LOG_DEBUG(getLogger("InsertQuery"),"as insert select after dist, is_table_dist {}", is_table_dist); res.pipeline = buildInsertSelectPipeline(query, table); } } else { - // LOG_DEBUG(getLogger("InsertQuery"),"as insert select, is_table_dist {}", is_table_dist); res.pipeline = buildInsertSelectPipeline(query, table); } } else { - // LOG_DEBUG(getLogger("InsertQuery"),"as just insert, is_table_dist {}", is_table_dist); res.pipeline = buildInsertPipeline(query, table); } diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index a539870d50c..27437d1b647 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -72,11 +72,6 @@ void SquashingTransform::append(Block && input_block) return; } - // LOG_DEBUG(getLogger("SquashingTransform"), - // "input_block rows {}, size {}, columns {}, accumulated_block rows {}, size {}, columns {}, ", - // input_block.rows(), input_block.bytes(), input_block.columns(), - // accumulated_block.rows(), accumulated_block.bytes(), accumulated_block.columns()); - assert(blocksHaveEqualStructure(input_block, accumulated_block)); try @@ -86,13 +81,6 @@ void SquashingTransform::append(Block && input_block) const auto source_column = std::move(input_block.getByPosition(i).column); auto acc_column = std::move(accumulated_block.getByPosition(i).column); - // LOG_DEBUG(getLogger("SquashingTransform"), - // "column {} {}, acc rows {}, size {}, allocated {}, input rows {} size {} allocated {}", - // i, source_column->getName(), - // acc_column->size(), acc_column->byteSize(), acc_column->allocatedBytes(), - // source_column->size(), source_column->byteSize(), source_column->allocatedBytes()); - - auto mutable_column = IColumn::mutate(std::move(acc_column)); mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); accumulated_block.getByPosition(i).column = std::move(mutable_column); diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index dba6fc40b11..0701e958877 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -20,11 +20,6 @@ namespace ErrorCodes void RestoreChunkInfosTransform::transform(Chunk & chunk) { - LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "chunk infos before: {}:{}, append: {}:{}, chunk has rows {}", - chunk.getChunkInfos().size(), chunk.getChunkInfos().debug(), - chunk_infos.size(), chunk_infos.debug(), - chunk.getNumRows()); - chunk.getChunkInfos().append(chunk_infos.clone()); } @@ -76,9 +71,6 @@ void TokenInfo::setSourceBlockNumber(size_t sbn) void TokenInfo::setViewID(const String & id) { - LOG_DEBUG(getLogger("TokenInfo"), - "token: {}, stage: {}, view id: {}", - getToken(false), stage, id); chassert(stage == VIEW_ID); addTokenPart(fmt::format("view-id-{}", id)); stage = VIEW_BLOCK_NUMBER; @@ -146,8 +138,6 @@ void SetInitialTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - LOG_DEBUG(getLogger("SetInitialTokenTransform"), "has token_info {}", bool(token_info)); - if (!token_info) throw Exception( ErrorCodes::LOGICAL_ERROR, @@ -208,7 +198,6 @@ void ResetTokenTransform::transform(Chunk & chunk) ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in ResetTokenTransform"); - LOG_DEBUG(getLogger("ResetTokenTransform"), "token_info was {}", token_info->getToken(false)); token_info->reset(); } diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 73d41828bc0..04fabc9a3c6 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -1,8 +1,6 @@ #include #include -#include - namespace DB { diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 531d264a25a..75228eb5c2d 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -17,9 +17,6 @@ SquashingChunksTransform::SquashingChunksTransform( void SquashingChunksTransform::onConsume(Chunk chunk) { - // LOG_DEBUG(getLogger("SquashingChunksTransform"), - // "onConsume {}", chunk.getNumRows()); - auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); cur_chunk = Chunk(result.block.getColumns(), result.block.rows()); @@ -36,11 +33,6 @@ void SquashingChunksTransform::onConsume(Chunk chunk) cur_chunk.setChunkInfos(chunk.getChunkInfos()); cur_chunkinfos = {}; } - - // LOG_DEBUG(getLogger("SquashingChunksTransform"), - // "got result rows {}, size {}, columns {}, infos: {}/{}", - // cur_chunk.getNumRows(), cur_chunk.bytes(), cur_chunk.getNumColumns(), - // cur_chunk.getChunkInfos().size(), cur_chunk.getChunkInfos().debug()); } else { @@ -90,11 +82,6 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( void SimpleSquashingChunksTransform::consume(Chunk chunk) { - // LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - // "transform rows {}, size {}, columns {}, infos: {}/{}", - // chunk.getNumRows(), chunk.bytes(), chunk.getNumColumns(), - // chunk.getChunkInfos().size(), chunk.getChunkInfos().debug()); - auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); if (result.block) @@ -110,11 +97,6 @@ void SimpleSquashingChunksTransform::consume(Chunk chunk) squashed_chunk.setChunkInfos(chunk.getChunkInfos()); squashed_info = {}; } - - // LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - // "got result rows {}, size {}, columns {}, infos: {}/{}", - // squashed_chunk.getNumRows(), squashed_chunk.bytes(), squashed_chunk.getNumColumns(), - // squashed_chunk.getChunkInfos().size(), squashed_chunk.getChunkInfos().debug()); } else { diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index faf3267a759..1fdcd4c5b74 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -50,8 +50,6 @@ MergeTreeSink::MergeTreeSink( , context(context_) , storage_snapshot(storage.getStorageSnapshotWithoutData(metadata_snapshot, context_)) { - LOG_INFO(storage.log, "MergeTreeSink() called for {}.{}", - storage_.getStorageID().database_name, storage_.getStorageID().getTableName()); } void MergeTreeSink::onStart() @@ -68,10 +66,6 @@ void MergeTreeSink::onFinish() void MergeTreeSink::consume(Chunk & chunk) { - LOG_INFO(storage.log, "consume() called num_blocks_processed {}, chunks: rows {} columns {} bytes {}", - num_blocks_processed, - chunk.getNumRows(), chunk.getNumColumns(), chunk.bytes()); - if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(nullptr, context, false); @@ -81,8 +75,6 @@ void MergeTreeSink::consume(Chunk & chunk) auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context); - LOG_INFO(storage.log, "consume() called part_blocks.count {}", part_blocks.size()); - using DelayedPartitions = std::vector; DelayedPartitions partitions; @@ -106,18 +98,7 @@ void MergeTreeSink::consume(Chunk & chunk) context->getSettingsRef().insert_deduplication_token.value); if (token_info->tokenInitialized()) - { block_dedup_token = token_info->getToken(); - - LOG_DEBUG(storage.log, - "dedup token from insert deduplication token in chunk: {}", - block_dedup_token); - } - else - { - LOG_DEBUG(storage.log, - "dedup token from hash is calculated"); - } } for (auto & current_block : part_blocks) @@ -162,13 +143,6 @@ void MergeTreeSink::consume(Chunk & chunk) else max_insert_delayed_streams_for_parallel_write = 0; - LOG_INFO(storage.log, "consume() called for {}.{} " - "streams {} + {} -> {}, " - "max {} support_parallel_write {}", - storage.getStorageID().database_name, storage.getStorageID().getTableName(), - streams, temp_part.streams.size(), streams + temp_part.streams.size(), - max_insert_delayed_streams_for_parallel_write, support_parallel_write); - /// In case of too much columns/parts in block, flush explicitly. streams += temp_part.streams.size(); @@ -211,12 +185,8 @@ void MergeTreeSink::finishDelayedChunk() if (!delayed_chunk) return; - LOG_INFO(storage.log, "finishDelayedChunk() called partitions count {}", delayed_chunk->partitions.size()); - for (auto & partition : delayed_chunk->partitions) { - LOG_INFO(storage.log, "finishDelayedChunk() part name {} dedup_token {}", partition.temp_part.part->name, partition.block_dedup_token); - ProfileEventsScope scoped_attach(&partition.part_counters); partition.temp_part.finalize(); @@ -234,14 +204,10 @@ void MergeTreeSink::finishDelayedChunk() auto * deduplication_log = storage.getDeduplicationLog(); - LOG_INFO(storage.log, "finishDelayedChunk() has dedup log {}", bool(deduplication_log)); - if (deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); - LOG_INFO(storage.log, "finishDelayedChunk() block_dedup_token={}, block_id={}", partition.block_dedup_token, block_id); - auto res = deduplication_log->addPart(block_id, part->info); if (!res.second) { diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 09cdc6a78bc..c5799fab09f 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -336,9 +336,6 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Permutation * permutation) { - LOG_DEBUG(getLogger("MergedBlockOutputStream()"), "writeImpl block rows {} size {} getPartDirectory {}", - block.rows(), block.bytes(), data_part_storage->getPartDirectory()); - block.checkNumberOfRows(); size_t rows = block.rows(); if (!rows) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 8cb4095f1e6..cf3af59118e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -311,20 +311,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) context->getSettingsRef().insert_deduplication_token.value); if (token_info->tokenInitialized()) - { - /// multiple blocks can be inserted within the same insert query - /// an ordinal number is added to dedup token to generate a distinctive block id for each block block_dedup_token = token_info->getToken(); - - LOG_DEBUG(storage.log, - "dedup token from insert deduplication token in chunk: {}", - block_dedup_token); - } - else - { - LOG_DEBUG(storage.log, - "dedup token from hash is calculated"); - } } auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 17ecba2b4a5..d4f6621b4fc 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1415,11 +1415,6 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) void StorageWindowView::writeIntoWindowView( StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) { - LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: rows {}, infos {} with {}, window column {}", - block.rows(), - chunk_infos.size(), chunk_infos.debug(), - window_view.timestamp_column_name); - window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) std::this_thread::sleep_for(std::chrono::milliseconds(100)); @@ -1464,9 +1459,6 @@ void StorageWindowView::writeIntoWindowView( lateness_bound = t_max_fired_watermark; } - LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: lateness_bound {}, window_view.is_proctime {}", - lateness_bound, window_view.is_proctime); - if (lateness_bound > 0) /// Add filter, which leaves rows with timestamp >= lateness_bound { auto filter_function = makeASTFunction( @@ -1583,9 +1575,6 @@ void StorageWindowView::writeIntoWindowView( if (block_max_timestamp) window_view.updateMaxTimestamp(block_max_timestamp); - - LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: block_max_timestamp {}", - block_max_timestamp); } UInt32 lateness_upper_bound = 0; From 4998c5888e6723f81627a14799ae0ade7676189b Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 11 Jun 2024 20:57:06 +0200 Subject: [PATCH 114/439] depricate update_insert_deduplication_token_in_dependent_materialized_views --- src/Core/Settings.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d6779a531ae..8ab66ba2a3e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -626,6 +626,7 @@ class IColumn; M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ + M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Depricated.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ From 3345f27b645838b058243a91bd69c8383f812324 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 11 Jun 2024 22:38:54 +0200 Subject: [PATCH 115/439] fix typo --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 8ab66ba2a3e..27201cc6cf0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -626,7 +626,7 @@ class IColumn; M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ - M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Depricated.", 0) \ + M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Deprecated.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ From 89234371438b94706fe903b9e55a30651bed0238 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 12 Jun 2024 19:36:23 +0200 Subject: [PATCH 116/439] add tests for cases from docs --- src/Core/Settings.h | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 5 +- src/Storages/MergeTree/MergeTreeSink.cpp | 7 +- ...08_deduplication_cases_from_docs.reference | 41 +++ .../03008_deduplication_cases_from_docs.sql | 331 ++++++++++++++++++ 5 files changed, 381 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference create mode 100644 tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 27201cc6cf0..1bfb5a1e18f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -624,7 +624,7 @@ class IColumn; M(Bool, optimize_time_filter_with_preimage, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')", 0) \ M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ - M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ + M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Deprecated.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index ed44a20e397..b35b6266735 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -441,9 +442,7 @@ Chain buildPushingToViewsChain( */ result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); - bool disable_deduplication_for_children = false; - if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) - disable_deduplication_for_children = !no_destination && storage->supportsDeduplication(); + bool disable_deduplication_for_children = !context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views; auto table_id = storage->getStorageID(); auto views = DatabaseCatalog::instance().getDependentViews(table_id); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 1fdcd4c5b74..4e20eade589 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,5 +1,8 @@ +#include +#include #include #include +#include #include #include #include @@ -185,6 +188,8 @@ void MergeTreeSink::finishDelayedChunk() if (!delayed_chunk) return; + const Settings & settings = context->getSettingsRef(); + for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -204,7 +209,7 @@ void MergeTreeSink::finishDelayedChunk() auto * deduplication_log = storage.getDeduplicationLog(); - if (deduplication_log) + if (settings.insert_deduplicate && deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference new file mode 100644 index 00000000000..4893274c1cd --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference @@ -0,0 +1,41 @@ +Different materialized view insert into one underlayed table equal data. +first attempt +from dst 1 A all_1_1_0 +from mv_dst 0 A all_1_1_0 +from mv_dst 0 A all_2_2_0 +second attempt +from dst 1 A all_1_1_0 +from mv_dst 0 A all_1_1_0 +from mv_dst 0 A all_2_2_0 +Different insert operations generate the same data after transformation in underlied table of materialized view. +first attempt +from dst 1 A all_1_1_0 +from mv_dst 0 A all_1_1_0 +second attempt +from dst 1 A all_1_1_0 +from dst 2 A all_2_2_0 +from mv_dst 0 A all_1_1_0 +from mv_dst 0 A all_2_2_0 +Indentical blocks in insertion with `insert_deduplication_token` +first attempt +from dst 0 A all_1_1_0 +from dst 0 A all_2_2_0 +second attempt +from dst 0 A all_1_1_0 +from dst 0 A all_2_2_0 +third attempt +from dst 0 A all_1_1_0 +from dst 0 A all_2_2_0 +Indentical blocks in insertion +from dst 0 A all_1_1_0 +Indentical blocks after materialised view`s transformation +first attempt +from dst 1 B all_1_1_0 +from dst 2 B all_2_2_0 +from mv_dst 0 B all_1_1_0 +from mv_dst 0 B all_2_2_0 +second attempt +from dst 1 B all_1_1_0 +from dst 2 B all_2_2_0 +from mv_dst 0 B all_1_1_0 +from mv_dst 0 B all_2_2_0 diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql new file mode 100644 index 00000000000..7927a6b1edf --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql @@ -0,0 +1,331 @@ +-- ######### +select 'Different materialized view insert into one underlayed table equal data.'; + +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS mv_dst; +DROP TABLE IF EXISTS mv_first; +DROP TABLE IF EXISTS mv_second; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE TABLE mv_dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE MATERIALIZED VIEW mv_first +TO mv_dst +AS SELECT + 0 AS key, + value AS value +FROM dst; + +CREATE MATERIALIZED VIEW mv_second +TO mv_dst +AS SELECT + 0 AS key, + value AS value +FROM dst; + +SET deduplicate_blocks_in_dependent_materialized_views=1; + +select 'first attempt'; + +INSERT INTO dst VALUES (1, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst VALUES (1, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +DROP TABLE mv_second; +DROP TABLE mv_first; +DROP TABLE mv_dst; +DROP TABLE dst; + + +-- ######### +select 'Different insert operations generate the same data after transformation in underlied table of materialized view.'; + +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS mv_dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE MATERIALIZED VIEW mv_dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000 +AS SELECT + 0 AS key, + value AS value +FROM dst; + +SET deduplicate_blocks_in_dependent_materialized_views=1; + +select 'first attempt'; + +INSERT INTO dst VALUES (1, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst VALUES (2, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +DROP TABLE mv_dst; +DROP TABLE dst; + + +-- ######### +select 'Indentical blocks in insertion with `insert_deduplication_token`'; + +DROP TABLE IF EXISTS dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + +select 'first attempt'; + +INSERT INTO dst SELECT + 0 AS key, + 'A' AS value +FROM numbers(2) +SETTINGS insert_deduplication_token='some_user_token'; + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst SELECT + 0 AS key, + 'A' AS value +FROM numbers(2) +SETTINGS insert_deduplication_token='some_user_token'; + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +select 'third attempt'; + +INSERT INTO dst SELECT + 1 AS key, + 'b' AS value +FROM numbers(2) +SETTINGS insert_deduplication_token='some_user_token'; + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +DROP TABLE dst; + + +-- ######### +select 'Indentical blocks in insertion'; + +DROP TABLE IF EXISTS dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + +INSERT INTO dst SELECT + 0 AS key, + 'A' AS value +FROM numbers(2); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +DROP TABLE dst; + + +-- ######### +select 'Indentical blocks after materialised view`s transformation'; + +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS mv_dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE MATERIALIZED VIEW mv_dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000 +AS SELECT + 0 AS key, + value AS value +FROM dst; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + +SET deduplicate_blocks_in_dependent_materialized_views=1; + +select 'first attempt'; + +INSERT INTO dst SELECT + number + 1 AS key, + IF(key = 0, 'A', 'B') AS value +FROM numbers(2); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst SELECT + number + 1 AS key, + IF(key = 0, 'A', 'B') AS value +FROM numbers(2); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +DROP TABLE mv_dst; +DROP TABLE dst; From 63852d9b0015b47ec93e2b7755c14bb7b002fcbd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 12 Jun 2024 20:45:29 +0200 Subject: [PATCH 117/439] fix fast test 00633_materialized_view_and_too_many_parts_zookeeper --- .../00633_materialized_view_and_too_many_parts_zookeeper.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh index 1fb219108da..8f7d19028b0 100755 --- a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh +++ b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh @@ -36,8 +36,8 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE c" echo ${CLICKHOUSE_CLIENT} --query "CREATE TABLE root (d UInt64) ENGINE = Null" ${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW d (d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/d', '1') ORDER BY d AS SELECT * FROM root" -${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; -${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; ${CLICKHOUSE_CLIENT} --query "SELECT * FROM d"; ${CLICKHOUSE_CLIENT} --query "DROP TABLE root" ${CLICKHOUSE_CLIENT} --query "DROP TABLE d" From dd28c052671651dd0891ad4c8a0a43f9842a6ce3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 12 Jun 2024 20:20:39 +0000 Subject: [PATCH 118/439] fix some cases --- .../Passes/FunctionToSubcolumnsPass.cpp | 17 +++++ .../array/FunctionsMapMiscellaneous.cpp | 6 +- ...functions_to_subcolumns_analyzer.reference | 72 +++++++++---------- ...71_function_to_subcolumns_fuzzer.reference | 3 + .../03171_function_to_subcolumns_fuzzer.sql | 39 ++++++++++ 5 files changed, 97 insertions(+), 40 deletions(-) create mode 100644 tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference create mode 100644 tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 030feac65dc..9cfd22cbef5 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -19,6 +19,7 @@ #include #include #include +#include namespace DB { @@ -269,10 +270,24 @@ public: enterImpl(*function_node, *first_argument_node, *table_node); return; } + + if (const auto * join_node = node->as()) + { + has_join_use_nulls |= getContext()->getSettingsRef().join_use_nulls; + return; + } } std::unordered_set getIdentifiersToOptimize() const { + if (has_join_use_nulls) + { + /// Do not optimize if we have JOIN with setting join_use_null. + /// It may change the behaviour if subcolumn can be coverted + /// to nullable while the original column cannot. + return {}; + } + /// Do not optimize if full column is requested in other context. /// It doesn't make sense because it doesn't reduce amount of read data /// and optimized functions are not computation heavy. But introducing @@ -306,7 +321,9 @@ private: std::unordered_set all_key_columns; std::unordered_map identifiers_count; std::unordered_map optimized_identifiers_count; + NameSet processed_tables; + bool has_join_use_nulls = false; void enterImpl(const TableNode & table_node) { diff --git a/src/Functions/array/FunctionsMapMiscellaneous.cpp b/src/Functions/array/FunctionsMapMiscellaneous.cpp index 76c1ec18171..c3586a57161 100644 --- a/src/Functions/array/FunctionsMapMiscellaneous.cpp +++ b/src/Functions/array/FunctionsMapMiscellaneous.cpp @@ -51,6 +51,8 @@ public: bool isVariadic() const override { return impl.isVariadic(); } size_t getNumberOfArguments() const override { return impl.getNumberOfArguments(); } + bool useDefaultImplementationForNulls() const override { return impl.useDefaultImplementationForNulls(); } + bool useDefaultImplementationForLowCardinalityColumns() const override { return impl.useDefaultImplementationForLowCardinalityColumns(); } bool useDefaultImplementationForConstants() const override { return impl.useDefaultImplementationForConstants(); } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return false; } @@ -184,7 +186,7 @@ struct MapToNestedAdapter : public MapAdapterBase struct MapToSubcolumnAdapter { - static_assert(position <= 1); + static_assert(position <= 1, "position of Map subcolumn must be 0 or 1"); static void extractNestedTypes(DataTypes & types) { @@ -357,7 +359,7 @@ struct NameMapValues { static constexpr auto name = "mapValues"; }; using FunctionMapValues = FunctionMapToArrayAdapter, NameMapValues>; struct NameMapContains { static constexpr auto name = "mapContains"; }; -using FunctionMapContains = FunctionMapToArrayAdapter, MapToSubcolumnAdapter, NameMapContains>; +using FunctionMapContains = FunctionMapToArrayAdapter, MapToSubcolumnAdapter, NameMapContains>; struct NameMapFilter { static constexpr auto name = "mapFilter"; }; using FunctionMapFilter = FunctionMapToArrayAdapter, NameMapFilter>; diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference index e409e9ad89f..32bacfba5ea 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference @@ -7,20 +7,22 @@ QUERY id: 0 isNotNull(n) UInt8 PROJECTION LIST id: 1, nodes: 3 - FUNCTION id: 2, function_name: isNull, function_type: ordinary, result_type: UInt8 + CONSTANT id: 2, constant_value: UInt64_0, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 3, function_name: isNull, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 4, nodes: 1 + COLUMN id: 5, column_name: id, result_type: UInt64, source_id: 6 + COLUMN id: 7, column_name: n.null, result_type: UInt8, source_id: 6 + FUNCTION id: 8, function_name: not, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 3, nodes: 1 - COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5 - COLUMN id: 6, column_name: n.null, result_type: UInt8, source_id: 5 - FUNCTION id: 7, function_name: not, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 8, nodes: 1 - COLUMN id: 9, column_name: n.null, result_type: UInt8, source_id: 5 + LIST id: 9, nodes: 1 + COLUMN id: 10, column_name: n.null, result_type: UInt8, source_id: 6 JOIN TREE - TABLE id: 5, alias: __table1, table_name: default.t_func_to_subcolumns + TABLE id: 6, alias: __table1, table_name: default.t_func_to_subcolumns SELECT - __table1.id IS NULL AS `isNull(id)`, + _CAST(0, \'UInt8\') AS `isNull(id)`, __table1.`n.null` AS `isNull(n)`, NOT __table1.`n.null` AS `isNotNull(n)` FROM default.t_func_to_subcolumns AS __table1 @@ -120,64 +122,58 @@ QUERY id: 0 LIST id: 1, nodes: 3 COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 COLUMN id: 4, column_name: n.null, result_type: UInt8, source_id: 3 - FUNCTION id: 5, function_name: isNull, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 6, nodes: 1 - COLUMN id: 7, column_name: n, result_type: String, source_id: 8 + CONSTANT id: 5, constant_value: UInt64_0, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 6, function_name: isNull, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: n, result_type: String, source_id: 9 JOIN TREE - JOIN id: 9, strictness: ALL, kind: FULL + JOIN id: 10, strictness: ALL, kind: FULL LEFT TABLE EXPRESSION TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns RIGHT TABLE EXPRESSION - UNION id: 8, alias: __table2, is_subquery: 1, union_mode: UNION_ALL + UNION id: 9, alias: __table2, is_subquery: 1, union_mode: UNION_ALL QUERIES - LIST id: 10, nodes: 2 - QUERY id: 11, alias: __table3 + LIST id: 11, nodes: 2 + QUERY id: 12, alias: __table3 PROJECTION COLUMNS id UInt8 - n String PROJECTION - LIST id: 12, nodes: 2 - CONSTANT id: 13, constant_value: UInt64_1, constant_value_type: UInt8 - CONSTANT id: 14, constant_value: \'qqq\', constant_value_type: String + LIST id: 13, nodes: 1 + CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8 JOIN TREE TABLE id: 15, alias: __table4, table_name: system.one QUERY id: 16, alias: __table5 PROJECTION COLUMNS id UInt8 - \'www\' String PROJECTION - LIST id: 17, nodes: 2 + LIST id: 17, nodes: 1 CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 - CONSTANT id: 19, constant_value: \'www\', constant_value_type: String JOIN TREE - TABLE id: 20, alias: __table6, table_name: system.one + TABLE id: 19, alias: __table6, table_name: system.one JOIN EXPRESSION - LIST id: 21, nodes: 1 - COLUMN id: 22, column_name: id, result_type: UInt64, source_id: 9 + LIST id: 20, nodes: 1 + COLUMN id: 21, column_name: id, result_type: UInt64, source_id: 10 EXPRESSION - LIST id: 23, nodes: 2 - COLUMN id: 24, column_name: id, result_type: UInt64, source_id: 3 - COLUMN id: 25, column_name: id, result_type: UInt8, source_id: 8 + LIST id: 22, nodes: 2 + COLUMN id: 23, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 24, column_name: id, result_type: UInt8, source_id: 9 SELECT __table1.id AS id, __table1.`n.null` AS `isNull(n)`, - __table2.n IS NULL AS `isNull(right.n)` + _CAST(0, \'UInt8\') AS `isNull(right.n)` FROM default.t_func_to_subcolumns AS __table1 ALL FULL OUTER JOIN ( ( - SELECT - 1 AS id, - \'qqq\' AS n + SELECT 1 AS id FROM system.one AS __table4 ) UNION ALL ( - SELECT - 3 AS id, - \'www\' AS `\'www\'` + SELECT 3 AS id FROM system.one AS __table6 ) ) AS __table2 USING (id) diff --git a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference new file mode 100644 index 00000000000..be47c4ab571 --- /dev/null +++ b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference @@ -0,0 +1,3 @@ +1 +2 1 +3 0 diff --git a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql new file mode 100644 index 00000000000..587288bbfdf --- /dev/null +++ b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql @@ -0,0 +1,39 @@ +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS t_func_to_subcolumns_map_2; + +CREATE TABLE t_func_to_subcolumns_map_2 (id UInt64, m Map(String, UInt64)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_func_to_subcolumns_map_2 VALUES (1, map('aaa', 1, 'bbb', 2)) (2, map('ccc', 3)); + +SELECT sum(mapContains(m, toNullable('aaa'))) FROM t_func_to_subcolumns_map_2; + +DROP TABLE t_func_to_subcolumns_map_2; + +DROP TABLE IF EXISTS t_func_to_subcolumns_join; + +CREATE TABLE t_func_to_subcolumns_join (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) +ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_func_to_subcolumns_join VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map()); + +SET join_use_nulls = 1; + +SELECT + id, + right.n IS NULL +FROM t_func_to_subcolumns_join AS left +FULL OUTER JOIN +( + SELECT + 1 AS id, + 'qqq' AS n + UNION ALL + SELECT + 3 AS id, + 'www' +) AS right USING (id) +WHERE empty(arr); + +DROP TABLE t_func_to_subcolumns_join; From 0da1bb3f049f6bcc76dee0821c5dcc74f2dd55b2 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 12 Jun 2024 22:40:54 +0000 Subject: [PATCH 119/439] fix typo --- src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 9cfd22cbef5..bc2028e1b43 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -283,7 +283,7 @@ public: if (has_join_use_nulls) { /// Do not optimize if we have JOIN with setting join_use_null. - /// It may change the behaviour if subcolumn can be coverted + /// It may change the behaviour if subcolumn can be converted /// to nullable while the original column cannot. return {}; } From b8992f039786c822876c075262d0a4ac9e3962df Mon Sep 17 00:00:00 2001 From: divanik Date: Thu, 13 Jun 2024 19:14:16 +0000 Subject: [PATCH 120/439] Create integration test --- programs/disks/CMakeLists.txt | 3 +- programs/disks/CommandChangeDirectory.cpp | 2 +- .../disks/CommandGetCurrentDiskAndPath.cpp | 30 + programs/disks/CommandHelp.cpp | 3 +- programs/disks/CommandRead.cpp | 1 - programs/disks/DisksApp.cpp | 32 +- programs/disks/DisksApp.h | 8 +- programs/disks/DisksClient.cpp | 8 +- programs/disks/ICommand.h | 1 + src/Disks/DiskLocal.cpp | 1 + .../configs/config.xml | 1659 +++++++++++++++++ .../configs/users.xml | 120 ++ .../test_disks_app_interactive/test.py | 321 ++++ 13 files changed, 2181 insertions(+), 8 deletions(-) create mode 100644 programs/disks/CommandGetCurrentDiskAndPath.cpp create mode 100644 tests/integration/test_disks_app_interactive/configs/config.xml create mode 100644 tests/integration/test_disks_app_interactive/configs/users.xml create mode 100644 tests/integration/test_disks_app_interactive/test.py diff --git a/programs/disks/CMakeLists.txt b/programs/disks/CMakeLists.txt index 2bf17a352e6..40f9cf3401c 100644 --- a/programs/disks/CMakeLists.txt +++ b/programs/disks/CMakeLists.txt @@ -13,7 +13,8 @@ set (CLICKHOUSE_DISKS_SOURCES CommandRemove.cpp CommandSwitchDisk.cpp CommandWrite.cpp - CommandHelp.cpp) + CommandHelp.cpp + CommandGetCurrentDiskAndPath.cpp) if (CLICKHOUSE_CLOUD) set (CLICKHOUSE_DISKS_SOURCES ${CLICKHOUSE_DISKS_SOURCES} CommandPackedIO.cpp) diff --git a/programs/disks/CommandChangeDirectory.cpp b/programs/disks/CommandChangeDirectory.cpp index 5e6a08cd3fd..5c4ce737375 100644 --- a/programs/disks/CommandChangeDirectory.cpp +++ b/programs/disks/CommandChangeDirectory.cpp @@ -15,7 +15,7 @@ public: command_name = "cd"; description = "Change directory"; options_description.add_options()("path", po::value(), "the path we want to get to (mandatory, positional)")( - "disk", po::value(), "A disk where the path is changed"); + "disk", po::value(), "A disk where the path is changed (without disk switching)"); positional_options_description.add("path", 1); } diff --git a/programs/disks/CommandGetCurrentDiskAndPath.cpp b/programs/disks/CommandGetCurrentDiskAndPath.cpp new file mode 100644 index 00000000000..2ad3525eb19 --- /dev/null +++ b/programs/disks/CommandGetCurrentDiskAndPath.cpp @@ -0,0 +1,30 @@ +#include +#include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandGetCurrentDiskAndPath final : public ICommand +{ +public: + explicit CommandGetCurrentDiskAndPath() : ICommand() + { + command_name = "current_disk_with_path"; + description = "Prints current disk and path (which coincide with prompt)"; + } + + void executeImpl(const CommandLineOptions &, DisksClient & client) override + { + auto disk = client.getCurrentDiskWithPath(); + std::cout << "Disk: " << disk.getDisk()->getName() << "\nPath: " << disk.getCurrentPath() << std::endl; + } +}; + +CommandPtr makeCommandGetCurrentDiskAndPath() +{ + return std::make_shared(); +} +} diff --git a/programs/disks/CommandHelp.cpp b/programs/disks/CommandHelp.cpp index becdae324b3..6f7e79a352e 100644 --- a/programs/disks/CommandHelp.cpp +++ b/programs/disks/CommandHelp.cpp @@ -14,7 +14,8 @@ public: { command_name = "help"; description = "Print help message about available commands (all or only required)"; - options_description.add_options()("command", po::value(), "A command to help with"); + options_description.add_options()( + "command", po::value(), "A command to help with (optional, positional), if not specified, help lists all the commands"); positional_options_description.add("command", 1); } diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp index ea05d25fb44..9f60cca2873 100644 --- a/programs/disks/CommandRead.cpp +++ b/programs/disks/CommandRead.cpp @@ -24,7 +24,6 @@ public: { auto disk = client.getCurrentDiskWithPath(); String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); - std::cerr << path_from << std::endl; std::optional path_to = getValueFromCommandLineOptionsWithOptional(options, "path-to"); auto in = disk.getDisk()->readFile(path_from); diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 296567c4b35..2fe490e22ff 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -192,6 +192,10 @@ std::vector DisksApp::getCompletions(const String & prefix) const bool DisksApp::processQueryText(const String & text) { + if (text.find_first_not_of(word_break_characters) == std::string::npos) + { + return true; + } if (exit_strings.find(text) != exit_strings.end()) return false; CommandPtr command; @@ -275,7 +279,8 @@ void DisksApp::addOptions() { options_description.add_options()("help,h", "Print common help message")("config-file,C", po::value(), "Set config file")( "disk", po::value(), "Set disk name")("save-logs", "Save logs to a file")( - "log-level", po::value(), "Logging level")("query,q", po::value(), "Query for a non-interactive mode"); + "log-level", po::value(), "Logging level")("query,q", po::value(), "Query for a non-interactive mode")( + "test-mode", "Interface in test regyme"); command_descriptions.emplace("list-disks", makeCommandListDisks()); command_descriptions.emplace("copy", makeCommandCopy()); @@ -288,6 +293,7 @@ void DisksApp::addOptions() command_descriptions.emplace("read", makeCommandRead()); command_descriptions.emplace("mkdir", makeCommandMkDir()); command_descriptions.emplace("switch-disk", makeCommandSwitchDisk()); + command_descriptions.emplace("current_disk_with_path", makeCommandGetCurrentDiskAndPath()); command_descriptions.emplace("help", makeCommandHelp(*this)); #ifdef CLICKHOUSE_CLOUD command_descriptions.emplace("packed-io", makeCommandPackedIO()); @@ -311,6 +317,8 @@ void DisksApp::processOptions() config().setBool("save-logs", true); if (options.count("log-level")) config().setString("log-level", options["log-level"].as()); + if (options.count("test-mode")) + config().setBool("test-mode", true); if (options.count("query")) query = std::optional{options["query"].as()}; } @@ -492,7 +500,7 @@ int DisksApp::main(const std::vector & /*args*/) if (!query.has_value()) { - runInteractiveReplxx(); + runInteractive(); } else { @@ -507,6 +515,26 @@ DisksApp::~DisksApp() if (global_context) global_context->shutdown(); } + +void DisksApp::runInteractiveTestMode() +{ + for (String input; std::getline(std::cin, input);) + { + if (!processQueryText(input)) + break; + + std::cout << "\a\a\a\a" << std::endl; + std::cerr << std::flush; + } +} + +void DisksApp::runInteractive() +{ + if (config().hasOption("test-mode")) + runInteractiveTestMode(); + else + runInteractiveReplxx(); +} } int mainEntryClickHouseDisks(int argc, char ** argv) diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h index ff05a5002e4..75d604bf63c 100644 --- a/programs/disks/DisksApp.h +++ b/programs/disks/DisksApp.h @@ -52,7 +52,9 @@ public: ~DisksApp() override; private: + void runInteractive(); void runInteractiveReplxx(); + void runInteractiveTestMode(); String getDefaultConfigFileName(); @@ -88,7 +90,11 @@ private: {"packed_io", "packed-io"}, {"change-dir", "cd"}, {"change_dir", "cd"}, - {"switch_disk", "switch-disk"}}; + {"switch_disk", "switch-disk"}, + {"current", "current_disk_with_path"}, + {"current_disk", "current_disk_with_path"}, + {"current_path", "current_disk_with_path"}, + {"cur", "current_disk_with_path"}}; std::set multidisk_commands = {"copy", "packed-io", "switch-disk", "cd"}; diff --git a/programs/disks/DisksClient.cpp b/programs/disks/DisksClient.cpp index 40b458fd7b3..4f808f85ab6 100644 --- a/programs/disks/DisksClient.cpp +++ b/programs/disks/DisksClient.cpp @@ -29,9 +29,15 @@ DiskWithPath::DiskWithPath(DiskPtr disk_, std::optional path_) : disk(di { path = String{"/"}; } + if (!disk->isDirectory(normalizePathAndGetAsRelative(path))) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} at disk {} is not a directory", path, disk->getName()); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Initializing path {} (normalized path: {}) at disk {} is not a directory", + path, + normalizePathAndGetAsRelative(path), + disk->getName()); } } diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h index 97013717784..dac614808d0 100644 --- a/programs/disks/ICommand.h +++ b/programs/disks/ICommand.h @@ -121,6 +121,7 @@ DB::CommandPtr makeCommandRemove(); DB::CommandPtr makeCommandWrite(); DB::CommandPtr makeCommandMkDir(); DB::CommandPtr makeCommandSwitchDisk(); +DB::CommandPtr makeCommandGetCurrentDiskAndPath(); DB::CommandPtr makeCommandHelp(const DisksApp & disks_app); #ifdef CLICKHOUSE_CLOUD DB::CommandPtr makeCommandPackedIO(); diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index d1f0a928b1d..6cb2599b82a 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -276,6 +276,7 @@ bool DiskLocal::isFile(const String & path) const bool DiskLocal::isDirectory(const String & path) const { + // std::cerr << fs::path(disk_path) / path << std::endl; return fs::is_directory(fs::path(disk_path) / path); } diff --git a/tests/integration/test_disks_app_interactive/configs/config.xml b/tests/integration/test_disks_app_interactive/configs/config.xml new file mode 100644 index 00000000000..5db40531f13 --- /dev/null +++ b/tests/integration/test_disks_app_interactive/configs/config.xml @@ -0,0 +1,1659 @@ + + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + + 1000M + 10 + + + + + + + + + + + + + + https://{bucket}.s3.amazonaws.com + + + https://storage.googleapis.com/{bucket} + + + https://{bucket}.oss.aliyuncs.com + + + + + +
+ Access-Control-Allow-Origin + * +
+
+ Access-Control-Allow-Headers + origin, x-requested-with, x-clickhouse-format, x-clickhouse-user, x-clickhouse-key, Authorization +
+
+ Access-Control-Allow-Methods + POST, GET, OPTIONS +
+
+ Access-Control-Max-Age + 86400 +
+
+ + + + + + 8123 + + + 9000 + + + 9004 + + + 9005 + + + + + + + + + + + + 9009 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 4096 + + + 10 + + + + + false + + + /path/to/ssl_cert_file + /path/to/ssl_key_file + + + false + + + /path/to/ssl_ca_cert_file + + + none + + + 0 + + + -1 + -1 + + + false + + + + + + + + + + none + true + true + sslv2,sslv3 + true + + + + RejectCertificateHandler + + + + + true + true + sslv2,sslv3 + true + + + + RejectCertificateHandler + + + + + + + + + 0 + 2 + + + 1000 + + + 0 + + + + 10000 + + + + + + + + + 0.9 + + + 4194304 + + + 0 + + + + + + 8589934592 + + + 5368709120 + + + 5368709120 + + + 1000 + + + 134217728 + + + 10000 + + + /home/ubuntu/work/clickdb/cache/ + + false + + + /var/lib/clickhouse/ + + + + + + + /var/lib/clickhouse/tmp/ + + + 1 + 1 + 1 + + + sha256_password + + + 12 + + + + + + + + + /var/lib/clickhouse/user_files/ + + + + + + + + + + + + + users.xml + + + + /var/lib/clickhouse/access/ + + + + + + + + true + + + true + + + true + + + true + + + true + + + false + + + 600 + + + + default + + + SQL_ + + + + + + + + + default + + + + + + + + + true + + + false + + ' | sed -e 's|.*>\(.*\)<.*|\1|') + wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge_$PKG_VER-1_all.deb + apt install --no-install-recommends -f ./clickhouse-jdbc-bridge_$PKG_VER-1_all.deb + clickhouse-jdbc-bridge & + + * [CentOS/RHEL] + export MVN_URL=https://repo1.maven.org/maven2/com/clickhouse/clickhouse-jdbc-bridge/ + export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '' | sed -e 's|.*>\(.*\)<.*|\1|') + wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm + yum localinstall -y clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm + clickhouse-jdbc-bridge & + + Please refer to https://github.com/ClickHouse/clickhouse-jdbc-bridge#usage for more information. + ]]> + + + + + + + + + + + + + + + + localhost + 9000 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 3600 + + + + 3600 + + + 60 + + + + + + + + + + + + + system + query_log
+ + toYYYYMM(event_date) + + + + + + + + 7500 + + 1048576 + + 8192 + + 524288 + + false + + + +
+ + + + system + trace_log
+ + toYYYYMM(event_date) + 7500 + 1048576 + 8192 + 524288 + + false +
+ + + + system + query_thread_log
+ toYYYYMM(event_date) + 7500 + 1048576 + 8192 + 524288 + false +
+ + + + system + query_views_log
+ toYYYYMM(event_date) + 7500 +
+ + + + system + part_log
+ toYYYYMM(event_date) + 7500 + 1048576 + 8192 + 524288 + false +
+ + + + + + system + metric_log
+ 7500 + 1048576 + 8192 + 524288 + 1000 + false +
+ + + + system + asynchronous_metric_log
+ 7000 + 1048576 + 8192 + 524288 + false +
+ + + + + + engine MergeTree + partition by toYYYYMM(finish_date) + order by (finish_date, finish_time_us, trace_id) + + system + opentelemetry_span_log
+ 7500 + 1048576 + 8192 + 524288 + false +
+ + + + + system + crash_log
+ + + 1000 + 1024 + 1024 + 512 + true +
+ + + + + + + system + processors_profile_log
+ + toYYYYMM(event_date) + 7500 + 1048576 + 8192 + 524288 + false +
+ + + + system + asynchronous_insert_log
+ + 7500 + 1048576 + 8192 + 524288 + false + event_date + event_date + INTERVAL 3 DAY +
+ + + + system + backup_log
+ toYYYYMM(event_date) + 7500 +
+ + + + system + s3queue_log
+ toYYYYMM(event_date) + 7500 +
+ + + + system + blob_storage_log
+ toYYYYMM(event_date) + 7500 + event_date + INTERVAL 30 DAY +
+ + + + + + + + + *_dictionary.*ml + + + true + + + true + + + *_function.*ml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + /clickhouse/task_queue/ddl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + click_cost + any + + 0 + 3600 + + + 86400 + 60 + + + + max + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + + + /var/lib/clickhouse/format_schemas/ + + + /usr/share/clickhouse/protos/ + + + + + + + + + + false + + false + + + https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277 + + false + + + + + + + + + + + + + + + + + + + + + + + + + + 1073741824 + 1024 + 1048576 + 30000000 + + + + backups + + + true + + + + + + + + + + + +
\ No newline at end of file diff --git a/tests/integration/test_disks_app_interactive/configs/users.xml b/tests/integration/test_disks_app_interactive/configs/users.xml new file mode 100644 index 00000000000..57bc6309a54 --- /dev/null +++ b/tests/integration/test_disks_app_interactive/configs/users.xml @@ -0,0 +1,120 @@ + + + + + + + + + + + + 1 + + + + + + + + + + + + + ::/0 + + + + default + + + default + + + 1 + + + 1 + + + + + + + + + + + + + + 3600 + + + 0 + 0 + 0 + 0 + 0 + + + + diff --git a/tests/integration/test_disks_app_interactive/test.py b/tests/integration/test_disks_app_interactive/test.py new file mode 100644 index 00000000000..4ecd8639104 --- /dev/null +++ b/tests/integration/test_disks_app_interactive/test.py @@ -0,0 +1,321 @@ +from helpers.cluster import ClickHouseCluster + +import pytest + +import pathlib + +# import os + +# import grpc +# import pymysql.connections +# import psycopg2 as py_psql +# import sys +# import threading + +# from helpers.cluster import ClickHouseCluster, run_and_check +# from helpers.test_tools import assert_logs_contain_with_retry +import subprocess +import select +import io +from typing import List, Tuple, Dict, Union, Optional + +import os + + +class ClickHouseDisksException(Exception): + pass + + +class LocalDisksClient(object): + SEPARATOR = b"\a\a\a\a\n" + client: Optional["LocalDisksClient"] = None # static variable + default_disk_root_directory: str = "/var/lib/clickhouse" + + def __init__(self, bin_path: str, config_path: str, working_path: str): + self.bin_path = bin_path + self.working_path = working_path + + self.proc = subprocess.Popen( + [bin_path, "disks", "--test-mode", "--config", config_path], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + self.poller = select.epoll() + self.poller.register(self.proc.stdout) + self.poller.register(self.proc.stderr) + + self.stopped = False + + self._fd_nums = { + self.proc.stdout.fileno(): self.proc.stdout, + self.proc.stderr.fileno(): self.proc.stderr, + } + + def execute_query(self, query: str, timeout: float = 5.0) -> str: + output = io.BytesIO() + + self.proc.stdin.write(query.encode() + b"\n") + self.proc.stdin.flush() + + events = self.poller.poll(timeout) + if not events: + raise TimeoutError(f"Disks client returned no output") + + for fd_num, event in events: + if event & (select.EPOLLIN | select.EPOLLPRI): + file = self._fd_nums[fd_num] + + if file == self.proc.stdout: + while True: + chunk = file.readline() + if chunk.endswith(self.SEPARATOR): + break + + output.write(chunk) + + elif file == self.proc.stderr: + error_line = self.proc.stderr.readline() + print(error_line) + raise ClickHouseDisksException(error_line.strip().decode()) + + else: + raise ValueError(f"Failed to read from pipe. Flag {event}") + + data = output.getvalue().strip().decode() + return data + + def list_disks(self) -> List[Tuple[str, str]]: + output = self.execute_query("list-disks") + return list( + sorted( + map( + lambda x: (x.split(":")[0], ":".join(x.split(":")[1:])), + output.split("\n"), + ) + ) + ) + + def current_disk_with_path(self) -> Tuple[str, str]: + output = self.execute_query("current_disk_with_path") + disk_line = output.split("\n")[0] + path_line = output.split("\n")[1] + assert disk_line.startswith("Disk: ") + assert path_line.startswith("Path: ") + return disk_line[6:], path_line[6:] + + def ls( + self, path: str, recursive: bool = False, show_hidden: bool = False + ) -> Union[List[str], Dict[str, List[str]]]: + recursive_adding = "--recursive " if recursive else "" + show_hidden_adding = "--all " if show_hidden else "" + output = self.execute_query( + f"list {path} {recursive_adding} {show_hidden_adding}" + ) + if recursive: + answer: Dict[str, List[str]] = dict() + blocks = output.split("\n\n") + for block in blocks: + directory = block.split("\n")[0][:-1] + files = block.split("\n")[1:] + answer[directory] = files + return answer + else: + return output.split("\n") + + def switch_disk(self, disk: str, directory: Optional[str] = None): + directory_addition = f"--path {directory} " if directory is not None else "" + self.execute_query(f"switch-disk {disk} {directory_addition}") + + def cd(self, directory: str, disk: Optional[str] = None): + disk_addition = f"--disk {disk} " if disk is not None else "" + self.execute_query(f"cd {directory} {disk_addition}") + + def copy( + self, + path_from, + path_to, + disk_from: Optional[str] = None, + disk_to: Optional[str] = None, + ): + disk_from_option = f"--disk-from {disk_from} " if disk_from is not None else "" + disk_to_option = f"--disk-to {disk_to} " if disk_to is not None else "" + self.execute_query( + f"copy {path_from} {path_to} {disk_from_option} {disk_to_option}" + ) + + def move(self, path_from: str, path_to: str): + self.execute_query(f"move {path_from} {path_to}") + + def rm(self, path: str): + self.execute_query(f"rm {path}") + + def mkdir(self, path: str, recursive: bool = False): + recursive_adding = "--recursive " if recursive else "" + self.execute_query(f"mkdir {path} {recursive_adding}") + + def ln(self, path_from: str, path_to: str): + self.execute_query(f"link {path_from} {path_to}") + + def read(self, path_from: str, path_to: Optional[str] = None): + path_to_adding = f"--path-to {path_to} " if path_to is not None else "" + output = self.execute_query(f"read {path_from} {path_to_adding}") + return output + + def write( + self, path_from: str, path_to: str + ): # Writing from stdin is difficult to test (do not know how to do this in python) + path_from_adding = f"--path-from {path_from}" + self.execute_query(f"write {path_from_adding} {path_to}") + + @staticmethod + def getClient(refresh: bool): + if (LocalDisksClient.client is None) or refresh: + binary_file = os.environ.get("CLICKHOUSE_TESTS_SERVER_BIN_PATH") + current_working_directory = str(pathlib.Path().resolve()) + config_file = f"{current_working_directory}/test_disks_app_interactive/configs/config.xml" + if not os.path.exists(LocalDisksClient.default_disk_root_directory): + os.mkdir(LocalDisksClient.default_disk_root_directory) + + LocalDisksClient.client = LocalDisksClient( + binary_file, config_file, current_working_directory + ) + return LocalDisksClient.client + else: + return LocalDisksClient.client + + +def test_disks_app_interactive_list_disks(): + client = LocalDisksClient.getClient(True) + expected_disks_with_path = [ + ("default", "/"), + ("local", client.working_path), + ] + assert expected_disks_with_path == client.list_disks() + assert client.current_disk_with_path() == ("default", "/") + client.switch_disk("local") + assert client.current_disk_with_path() == ( + "local", + client.working_path, + ) + + +def test_disks_app_interactive_list_files_local(): + client = LocalDisksClient.getClient(True) + client.switch_disk("local") + excepted_listed_files = sorted(os.listdir("test_disks_app_interactive/")) + listed_files = sorted(client.ls("test_disks_app_interactive/")) + assert excepted_listed_files == listed_files + + +def test_disks_app_interactive_list_directories_default(): + client = LocalDisksClient.getClient(True) + traversed_dir = client.ls(".", recursive=True) + client.mkdir("dir1") + client.mkdir("dir2") + client.mkdir(".dir3") + client.cd("dir1") + client.mkdir("dir11") + client.mkdir(".dir12") + client.mkdir("dir13") + client.cd("../dir2") + client.mkdir("dir21") + client.mkdir("dir22") + client.mkdir(".dir23") + client.cd("../.dir3") + client.mkdir("dir31") + client.mkdir(".dir32") + client.cd("..") + traversed_dir = client.ls(".", recursive=True) + assert traversed_dir == { + ".": ["dir1", "dir2"], + "./dir1": ["dir11", "dir13"], + "./dir2": ["dir21", "dir22"], + "./dir1/dir11": [], + "./dir1/dir13": [], + "./dir2/dir21": [], + "./dir2/dir22": [], + } + traversed_dir = client.ls(".", recursive=True, show_hidden=True) + assert traversed_dir == { + ".": [".dir3", "dir1", "dir2"], + "./dir1": [".dir12", "dir11", "dir13"], + "./dir2": [".dir23", "dir21", "dir22"], + "./.dir3": [".dir32", "dir31"], + "./dir1/dir11": [], + "./dir1/.dir12": [], + "./dir1/dir13": [], + "./dir2/dir21": [], + "./dir2/dir22": [], + "./dir2/.dir23": [], + "./.dir3/dir31": [], + "./.dir3/.dir32": [], + } + client.rm("dir2") + traversed_dir = client.ls(".", recursive=True, show_hidden=True) + assert traversed_dir == { + ".": [".dir3", "dir1"], + "./dir1": [".dir12", "dir11", "dir13"], + "./.dir3": [".dir32", "dir31"], + "./dir1/dir11": [], + "./dir1/.dir12": [], + "./dir1/dir13": [], + "./.dir3/dir31": [], + "./.dir3/.dir32": [], + } + traversed_dir = client.ls(".", recursive=True, show_hidden=False) + assert traversed_dir == { + ".": ["dir1"], + "./dir1": ["dir11", "dir13"], + "./dir1/dir11": [], + "./dir1/dir13": [], + } + client.rm("dir1") + client.rm(".dir3") + assert client.ls(".", recursive=True, show_hidden=False) == {".": []} + + +def test_disks_app_interactive_cp_and_read(): + initial_text = "File content" + with open("a.txt", "w") as file: + file.write(initial_text) + client = LocalDisksClient.getClient(True) + client.switch_disk("default") + client.copy("a.txt", "/a.txt", disk_from="local", disk_to="default") + read_text = client.read("a.txt") + assert initial_text == read_text + client.mkdir("dir1") + client.copy("a.txt", "/dir1/b.txt", disk_from="local", disk_to="default") + read_text = client.read("a.txt", path_to="dir1/b.txt") + assert "" == read_text + read_text = client.read("/dir1/b.txt") + assert read_text == initial_text + with open( + f"{LocalDisksClient.default_disk_root_directory}/dir1/b.txt", "r" + ) as file: + read_text = file.read() + assert read_text == initial_text + os.remove("a.txt") + client.rm("a.txt") + client.rm("/dir1") + + +def test_disks_app_interactive_test_move_and_write(): + initial_text = "File content" + with open("a.txt", "w") as file: + file.write(initial_text) + client = LocalDisksClient.getClient(True) + client.switch_disk("default") + client.copy("a.txt", "/a.txt", disk_from="local", disk_to="default") + files = client.ls(".") + assert files == ["a.txt"] + client.move("a.txt", "b.txt") + files = client.ls(".") + assert files == ["b.txt"] + read_text = client.read("/b.txt") + assert read_text == initial_text + client.write("b.txt", "c.txt") + read_text = client.read("c.txt") + assert read_text == initial_text + os.remove("a.txt") From 2824ca64e0d41c7401c72d84c4a001d680ca78fd Mon Sep 17 00:00:00 2001 From: divanik Date: Fri, 14 Jun 2024 11:06:56 +0000 Subject: [PATCH 121/439] Tests and code style --- programs/disks/DisksApp.cpp | 4 +- programs/disks/ICommand.cpp | 2 +- programs/disks/ICommand.h | 2 +- tests/integration/test_disks_app_func/test.py | 4 +- .../configs/config.xml | 1656 ----------------- .../configs/users.xml | 120 -- .../test_disks_app_interactive/test.py | 60 +- 7 files changed, 40 insertions(+), 1808 deletions(-) delete mode 100644 tests/integration/test_disks_app_interactive/configs/users.xml diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 2fe490e22ff..7f657dd32a3 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -23,8 +23,8 @@ namespace DB namespace ErrorCodes { -extern const int BAD_ARGUMENTS; -extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; }; LineReader::Patterns DisksApp::query_extenders = {"\\"}; diff --git a/programs/disks/ICommand.cpp b/programs/disks/ICommand.cpp index 41fa281794e..0c149a8f9df 100644 --- a/programs/disks/ICommand.cpp +++ b/programs/disks/ICommand.cpp @@ -7,7 +7,7 @@ namespace DB namespace ErrorCodes { -extern const int BAD_ARGUMENTS; + extern const int BAD_ARGUMENTS; } CommandLineOptions ICommand::processCommandLineArguments(const Strings & commands) diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h index dac614808d0..b1e594066af 100644 --- a/programs/disks/ICommand.h +++ b/programs/disks/ICommand.h @@ -29,7 +29,7 @@ using CommandLineOptions = po::variables_map; namespace ErrorCodes { -extern const int BAD_ARGUMENTS; + extern const int BAD_ARGUMENTS; } class ICommand diff --git a/tests/integration/test_disks_app_func/test.py b/tests/integration/test_disks_app_func/test.py index d643230d198..ac1edae4199 100644 --- a/tests/integration/test_disks_app_func/test.py +++ b/tests/integration/test_disks_app_func/test.py @@ -9,7 +9,9 @@ def started_cluster(): try: cluster = ClickHouseCluster(__file__) cluster.add_instance( - "disks_app_test", main_configs=["config.xml"], with_minio=True + "disks_app_test", + main_configs=["server_configs/config.xml"], + with_minio=True, ) cluster.start() diff --git a/tests/integration/test_disks_app_interactive/configs/config.xml b/tests/integration/test_disks_app_interactive/configs/config.xml index 5db40531f13..bcbb107f0a2 100644 --- a/tests/integration/test_disks_app_interactive/configs/config.xml +++ b/tests/integration/test_disks_app_interactive/configs/config.xml @@ -1,1659 +1,3 @@ - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - - 1000M - 10 - - - - - - - - - - - - - - https://{bucket}.s3.amazonaws.com - - - https://storage.googleapis.com/{bucket} - - - https://{bucket}.oss.aliyuncs.com - - - - - -
- Access-Control-Allow-Origin - * -
-
- Access-Control-Allow-Headers - origin, x-requested-with, x-clickhouse-format, x-clickhouse-user, x-clickhouse-key, Authorization -
-
- Access-Control-Allow-Methods - POST, GET, OPTIONS -
-
- Access-Control-Max-Age - 86400 -
-
- - - - - - 8123 - - - 9000 - - - 9004 - - - 9005 - - - - - - - - - - - - 9009 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 4096 - - - 10 - - - - - false - - - /path/to/ssl_cert_file - /path/to/ssl_key_file - - - false - - - /path/to/ssl_ca_cert_file - - - none - - - 0 - - - -1 - -1 - - - false - - - - - - - - - - none - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 0 - 2 - - - 1000 - - - 0 - - - - 10000 - - - - - - - - - 0.9 - - - 4194304 - - - 0 - - - - - - 8589934592 - - - 5368709120 - - - 5368709120 - - - 1000 - - - 134217728 - - - 10000 - - - /home/ubuntu/work/clickdb/cache/ - - false - - /var/lib/clickhouse/ - - - - - - - /var/lib/clickhouse/tmp/ - - - 1 - 1 - 1 - - - sha256_password - - - 12 - - - - - - - - - /var/lib/clickhouse/user_files/ - - - - - - - - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - - - - true - - - true - - - true - - - true - - - true - - - false - - - 600 - - - - default - - - SQL_ - - - - - - - - - default - - - - - - - - - true - - - false - - ' | sed -e 's|.*>\(.*\)<.*|\1|') - wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge_$PKG_VER-1_all.deb - apt install --no-install-recommends -f ./clickhouse-jdbc-bridge_$PKG_VER-1_all.deb - clickhouse-jdbc-bridge & - - * [CentOS/RHEL] - export MVN_URL=https://repo1.maven.org/maven2/com/clickhouse/clickhouse-jdbc-bridge/ - export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '' | sed -e 's|.*>\(.*\)<.*|\1|') - wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm - yum localinstall -y clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm - clickhouse-jdbc-bridge & - - Please refer to https://github.com/ClickHouse/clickhouse-jdbc-bridge#usage for more information. - ]]> - - - - - - - - - - - - - - - - localhost - 9000 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - - - - - - - 7500 - - 1048576 - - 8192 - - 524288 - - false - - - -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 - 1048576 - 8192 - 524288 - - false -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 - 1048576 - 8192 - 524288 - false -
- - - - system - query_views_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 - 1048576 - 8192 - 524288 - false -
- - - - - - system - metric_log
- 7500 - 1048576 - 8192 - 524288 - 1000 - false -
- - - - system - asynchronous_metric_log
- 7000 - 1048576 - 8192 - 524288 - false -
- - - - - - engine MergeTree - partition by toYYYYMM(finish_date) - order by (finish_date, finish_time_us, trace_id) - - system - opentelemetry_span_log
- 7500 - 1048576 - 8192 - 524288 - false -
- - - - - system - crash_log
- - - 1000 - 1024 - 1024 - 512 - true -
- - - - - - - system - processors_profile_log
- - toYYYYMM(event_date) - 7500 - 1048576 - 8192 - 524288 - false -
- - - - system - asynchronous_insert_log
- - 7500 - 1048576 - 8192 - 524288 - false - event_date - event_date + INTERVAL 3 DAY -
- - - - system - backup_log
- toYYYYMM(event_date) - 7500 -
- - - - system - s3queue_log
- toYYYYMM(event_date) - 7500 -
- - - - system - blob_storage_log
- toYYYYMM(event_date) - 7500 - event_date + INTERVAL 30 DAY -
- - - - - - - - - *_dictionary.*ml - - - true - - - true - - - *_function.*ml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 60 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - /usr/share/clickhouse/protos/ - - - - - - - - - - false - - false - - - https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277 - - false - - - - - - - - - - - - - - - - - - - - - - - - - - 1073741824 - 1024 - 1048576 - 30000000 - - - - backups - - - true - - - - - - - - - - -
\ No newline at end of file diff --git a/tests/integration/test_disks_app_interactive/configs/users.xml b/tests/integration/test_disks_app_interactive/configs/users.xml deleted file mode 100644 index 57bc6309a54..00000000000 --- a/tests/integration/test_disks_app_interactive/configs/users.xml +++ /dev/null @@ -1,120 +0,0 @@ - - - - - - - - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - 1 - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/integration/test_disks_app_interactive/test.py b/tests/integration/test_disks_app_interactive/test.py index 4ecd8639104..79ffc3001a5 100644 --- a/tests/integration/test_disks_app_interactive/test.py +++ b/tests/integration/test_disks_app_interactive/test.py @@ -4,16 +4,6 @@ import pytest import pathlib -# import os - -# import grpc -# import pymysql.connections -# import psycopg2 as py_psql -# import sys -# import threading - -# from helpers.cluster import ClickHouseCluster, run_and_check -# from helpers.test_tools import assert_logs_contain_with_retry import subprocess import select import io @@ -26,9 +16,27 @@ class ClickHouseDisksException(Exception): pass -class LocalDisksClient(object): +@pytest.fixture(scope="module") +def started_cluster(): + global cluster + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "disks_app_test", + main_configs=["server_configs/config.xml"], + with_minio=True, + ) + + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +class DisksClient(object): SEPARATOR = b"\a\a\a\a\n" - client: Optional["LocalDisksClient"] = None # static variable + local_client: Optional["DisksClient"] = None # static variable default_disk_root_directory: str = "/var/lib/clickhouse" def __init__(self, bin_path: str, config_path: str, working_path: str): @@ -170,24 +178,24 @@ class LocalDisksClient(object): self.execute_query(f"write {path_from_adding} {path_to}") @staticmethod - def getClient(refresh: bool): - if (LocalDisksClient.client is None) or refresh: + def getLocalDisksClient(refresh: bool): + if (DisksClient.local_client is None) or refresh: binary_file = os.environ.get("CLICKHOUSE_TESTS_SERVER_BIN_PATH") current_working_directory = str(pathlib.Path().resolve()) config_file = f"{current_working_directory}/test_disks_app_interactive/configs/config.xml" - if not os.path.exists(LocalDisksClient.default_disk_root_directory): - os.mkdir(LocalDisksClient.default_disk_root_directory) + if not os.path.exists(DisksClient.default_disk_root_directory): + os.mkdir(DisksClient.default_disk_root_directory) - LocalDisksClient.client = LocalDisksClient( + DisksClient.local_client = DisksClient( binary_file, config_file, current_working_directory ) - return LocalDisksClient.client + return DisksClient.local_client else: - return LocalDisksClient.client + return DisksClient.local_client def test_disks_app_interactive_list_disks(): - client = LocalDisksClient.getClient(True) + client = DisksClient.getLocalDisksClient(True) expected_disks_with_path = [ ("default", "/"), ("local", client.working_path), @@ -202,7 +210,7 @@ def test_disks_app_interactive_list_disks(): def test_disks_app_interactive_list_files_local(): - client = LocalDisksClient.getClient(True) + client = DisksClient.getLocalDisksClient(True) client.switch_disk("local") excepted_listed_files = sorted(os.listdir("test_disks_app_interactive/")) listed_files = sorted(client.ls("test_disks_app_interactive/")) @@ -210,7 +218,7 @@ def test_disks_app_interactive_list_files_local(): def test_disks_app_interactive_list_directories_default(): - client = LocalDisksClient.getClient(True) + client = DisksClient.getLocalDisksClient(True) traversed_dir = client.ls(".", recursive=True) client.mkdir("dir1") client.mkdir("dir2") @@ -280,7 +288,7 @@ def test_disks_app_interactive_cp_and_read(): initial_text = "File content" with open("a.txt", "w") as file: file.write(initial_text) - client = LocalDisksClient.getClient(True) + client = DisksClient.getLocalDisksClient(True) client.switch_disk("default") client.copy("a.txt", "/a.txt", disk_from="local", disk_to="default") read_text = client.read("a.txt") @@ -291,9 +299,7 @@ def test_disks_app_interactive_cp_and_read(): assert "" == read_text read_text = client.read("/dir1/b.txt") assert read_text == initial_text - with open( - f"{LocalDisksClient.default_disk_root_directory}/dir1/b.txt", "r" - ) as file: + with open(f"{DisksClient.default_disk_root_directory}/dir1/b.txt", "r") as file: read_text = file.read() assert read_text == initial_text os.remove("a.txt") @@ -305,7 +311,7 @@ def test_disks_app_interactive_test_move_and_write(): initial_text = "File content" with open("a.txt", "w") as file: file.write(initial_text) - client = LocalDisksClient.getClient(True) + client = DisksClient.getLocalDisksClient(True) client.switch_disk("default") client.copy("a.txt", "/a.txt", disk_from="local", disk_to="default") files = client.ls(".") From 4ace4006d2f1be82fe299cdda73d5561b7103110 Mon Sep 17 00:00:00 2001 From: divanik Date: Fri, 14 Jun 2024 13:02:41 +0000 Subject: [PATCH 122/439] Add documentation for new features in interactive client --- .../operations/utilities/clickhouse-disks.md | 58 ++++++++++++------- programs/disks/CommandCopy.cpp | 11 ++-- programs/disks/CommandMove.cpp | 4 +- programs/disks/CommandRemove.cpp | 2 +- programs/disks/DisksApp.cpp | 2 +- 5 files changed, 48 insertions(+), 29 deletions(-) diff --git a/docs/en/operations/utilities/clickhouse-disks.md b/docs/en/operations/utilities/clickhouse-disks.md index 76db9e41836..a2a5035600b 100644 --- a/docs/en/operations/utilities/clickhouse-disks.md +++ b/docs/en/operations/utilities/clickhouse-disks.md @@ -4,35 +4,53 @@ sidebar_position: 59 sidebar_label: clickhouse-disks --- -# clickhouse-disks +# Clickhouse-disks -A utility providing filesystem-like operations for ClickHouse disks. +A utility providing filesystem-like operations for ClickHouse disks. It can work in both interactive and not interactive modes. -Program-wide options: +## Program-wide options * `--config-file, -C` -- path to ClickHouse config, defaults to `/etc/clickhouse-server/config.xml`. * `--save-logs` -- Log progress of invoked commands to `/var/log/clickhouse-server/clickhouse-disks.log`. * `--log-level` -- What [type](../server-configuration-parameters/settings#server_configuration_parameters-logger) of events to log, defaults to `none`. * `--disk` -- what disk to use for `mkdir, move, read, write, remove` commands. Defaults to `default`. +* `--query, -q` -- single query that can be executed without launching interactive mode + +## Default Disks +After the launch two disks are initialized. The first one is a disk `local` that is supposed to imitate local file system from which clickhouse-disks utility was launched. The second one is a disk `default` that is mounted to the local filesystem in the directory that can be found in config as a parameter `clickhouse/path` (default value is `/var/lib/clickhouse`). + +## Clickhouse-disks state +For each disk that was added the utility stores current directory (as in a usual filesystem). User can change current directory and switch between disks. + +State is reflected in a prompt "`disk_name`:`path_name`" ## Commands -* `copy [--disk-from d1] [--disk-to d2] `. - Recursively copy data from `FROM_PATH` at disk `d1` (defaults to `disk` value if not provided) - to `TO_PATH` at disk `d2` (defaults to `disk` value if not provided). -* `move `. - Move file or directory from `FROM_PATH` to `TO_PATH`. -* `remove `. - Remove `PATH` recursively. -* `link `. - Create a hardlink from `FROM_PATH` to `TO_PATH`. -* `list [--recursive] ...` - List files at `PATH`s. Non-recursive by default. -* `list-disks`. +In these documentation file all mandatory positional arguments are referred as ``, named arguments are referred as `[--parameter value]`. All positional parameters could be mentioned as a named parameter with a corresponding name. + +* `cd (change-dir, change_dir) [--disk disk] ` + Change directory to path `path` on disk `disk` (default value is a current disk). No disk switching happens. +* `copy (cp) [--disk-from disk_1] [--disk-to disk_2] `. + Recursively copy data from `path-from` at disk `disk_1` (default value is a current disk (parameter `disk` in a non-interactive mode)) + to `path-to` at disk `disk_2` (default value is a current disk (parameter `disk` in a non-interactive mode)). +* `current_disk_with_path (current, current_disk, current_path)` + Print current state in format: + `Disk: "current_disk" Path: "current path on current disk"` +* `move (mv) `. + Move file or directory from `path-from` to `path-to` within current disk. +* `remove (rm, delete) `. + Remove `path` recursively on a current disk. +* `link (ln) `. + Create a hardlink from `path-from` to `path-to` on a current disk. +* `list (ls) [--recursive] ` + List files at `path`s on a current disk. Non-recursive by default. +* `list-disks (list_disks, ls-disks, ls_disks)`. List disks names. -* `mkdir [--recursive] `. +* `mkdir [--recursive] ` on a current disk. Create a directory. Non-recursive by default. -* `read: []` - Read a file from `FROM_PATH` to `TO_PATH` (`stdout` if not supplied). -* `write [FROM_PATH] `. - Write a file from `FROM_PATH` (`stdin` if not supplied) to `TO_PATH`. +* `read (r) [--path-to path]` + Read a file from `path-from` to `path` (`stdout` if not supplied). +* `switch-disk [--path path] ` + Switch to disk `disk` on path `path` (if `path` is not specified default value is a previous path on disk `disk`). +* `write (w) [--path-from path] `. + Write a file from `path` (`stdin` if not supplied) to `path-to`. diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index ae749f7448a..4ba8a9ecbc2 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -12,11 +12,12 @@ public: explicit CommandCopy() : ICommand() { command_name = "copy"; - description = "Recursively copy data from `FROM_PATH` to `TO_PATH`"; - options_description.add_options()("disk-from", po::value(), "disk from which we copy")( - "disk-to", po::value(), "disk to which we copy")( - "path-from", po::value(), "path from which we copy (mandatory, positional)")( - "path-to", po::value(), "path to which we copy (mandatory, positional)"); + description = "Recursively copy data from `path-from` to `path-to`"; + options_description.add_options()( + "disk-from", po::value(), "disk from which we copy is executed (default value is a current disk)")( + "disk-to", po::value(), "disk to which copy is executed (default value is a current disk)")( + "path-from", po::value(), "path from which copy is executed (mandatory, positional)")( + "path-to", po::value(), "path to which copy is executed (mandatory, positional)"); positional_options_description.add("path-from", 1); positional_options_description.add("path-to", 1); } diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 23144df3d35..d762e8023d9 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -12,14 +12,14 @@ public: command_name = "move"; description = "Move file or directory from `from_path` to `to_path`"; options_description.add_options()("path-from", po::value(), "path from which we copy (mandatory, positional)")( - "path-to", po::value(), "path to which we copy (mandatory, positional)"); + "path-to", po::value(), "path to which we copy (mandatory, positional)")s; positional_options_description.add("path-from", 1); positional_options_description.add("path-to", 1); } void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - auto disk = client.getCurrentDiskWithPath(); + auto disk = getDiskWithPath(client, options, "disk"); String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index b322fb2701f..1576777a4cd 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -17,7 +17,7 @@ public: void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - auto disk = client.getCurrentDiskWithPath(); + auto disk = getDiskWithPath(client, options, "disk"); const String & path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path")); disk.getDisk()->removeRecursive(path); } diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 7f657dd32a3..d2adf5bac7a 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -280,7 +280,7 @@ void DisksApp::addOptions() options_description.add_options()("help,h", "Print common help message")("config-file,C", po::value(), "Set config file")( "disk", po::value(), "Set disk name")("save-logs", "Save logs to a file")( "log-level", po::value(), "Logging level")("query,q", po::value(), "Query for a non-interactive mode")( - "test-mode", "Interface in test regyme"); + "test-mode", "Interactive interface in test regyme"); command_descriptions.emplace("list-disks", makeCommandListDisks()); command_descriptions.emplace("copy", makeCommandCopy()); From 752bd0078450ad6b9255c860eec9933fb74abb8a Mon Sep 17 00:00:00 2001 From: divanik Date: Fri, 14 Jun 2024 13:54:12 +0000 Subject: [PATCH 123/439] Corrected problems and tests --- docs/en/operations/utilities/clickhouse-disks.md | 3 +++ programs/disks/CommandMove.cpp | 4 ++-- programs/disks/CommandRemove.cpp | 2 +- tests/integration/test_disks_app_func/test.py | 2 +- tests/integration/test_disks_app_interactive/__init__.py | 0 tests/integration/test_disks_app_interactive/test.py | 1 - 6 files changed, 7 insertions(+), 5 deletions(-) create mode 100644 tests/integration/test_disks_app_interactive/__init__.py diff --git a/docs/en/operations/utilities/clickhouse-disks.md b/docs/en/operations/utilities/clickhouse-disks.md index a2a5035600b..5363449a960 100644 --- a/docs/en/operations/utilities/clickhouse-disks.md +++ b/docs/en/operations/utilities/clickhouse-disks.md @@ -15,6 +15,7 @@ A utility providing filesystem-like operations for ClickHouse disks. It can work * `--log-level` -- What [type](../server-configuration-parameters/settings#server_configuration_parameters-logger) of events to log, defaults to `none`. * `--disk` -- what disk to use for `mkdir, move, read, write, remove` commands. Defaults to `default`. * `--query, -q` -- single query that can be executed without launching interactive mode +* `--help, -h` -- print all the options and commamds with description ## Default Disks After the launch two disks are initialized. The first one is a disk `local` that is supposed to imitate local file system from which clickhouse-disks utility was launched. The second one is a disk `default` that is mounted to the local filesystem in the directory that can be found in config as a parameter `clickhouse/path` (default value is `/var/lib/clickhouse`). @@ -36,6 +37,8 @@ In these documentation file all mandatory positional arguments are referred as ` * `current_disk_with_path (current, current_disk, current_path)` Print current state in format: `Disk: "current_disk" Path: "current path on current disk"` +* `help []` + Print help message about command `command`. If `command` is not specified print information about all commands. * `move (mv) `. Move file or directory from `path-from` to `path-to` within current disk. * `remove (rm, delete) `. diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index d762e8023d9..23144df3d35 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -12,14 +12,14 @@ public: command_name = "move"; description = "Move file or directory from `from_path` to `to_path`"; options_description.add_options()("path-from", po::value(), "path from which we copy (mandatory, positional)")( - "path-to", po::value(), "path to which we copy (mandatory, positional)")s; + "path-to", po::value(), "path to which we copy (mandatory, positional)"); positional_options_description.add("path-from", 1); positional_options_description.add("path-to", 1); } void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - auto disk = getDiskWithPath(client, options, "disk"); + auto disk = client.getCurrentDiskWithPath(); String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index 1576777a4cd..b322fb2701f 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -17,7 +17,7 @@ public: void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - auto disk = getDiskWithPath(client, options, "disk"); + auto disk = client.getCurrentDiskWithPath(); const String & path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path")); disk.getDisk()->removeRecursive(path); } diff --git a/tests/integration/test_disks_app_func/test.py b/tests/integration/test_disks_app_func/test.py index ac1edae4199..34e45a9d626 100644 --- a/tests/integration/test_disks_app_func/test.py +++ b/tests/integration/test_disks_app_func/test.py @@ -10,7 +10,7 @@ def started_cluster(): cluster = ClickHouseCluster(__file__) cluster.add_instance( "disks_app_test", - main_configs=["server_configs/config.xml"], + main_configs=["config.xml"], with_minio=True, ) diff --git a/tests/integration/test_disks_app_interactive/__init__.py b/tests/integration/test_disks_app_interactive/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_disks_app_interactive/test.py b/tests/integration/test_disks_app_interactive/test.py index 79ffc3001a5..35bd49485e4 100644 --- a/tests/integration/test_disks_app_interactive/test.py +++ b/tests/integration/test_disks_app_interactive/test.py @@ -11,7 +11,6 @@ from typing import List, Tuple, Dict, Union, Optional import os - class ClickHouseDisksException(Exception): pass From a3cab6853e187d8dfd1894ce2be332529cffb24c Mon Sep 17 00:00:00 2001 From: divanik Date: Fri, 14 Jun 2024 14:10:00 +0000 Subject: [PATCH 124/439] python test reformat --- tests/integration/test_disks_app_interactive/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_disks_app_interactive/test.py b/tests/integration/test_disks_app_interactive/test.py index 35bd49485e4..79ffc3001a5 100644 --- a/tests/integration/test_disks_app_interactive/test.py +++ b/tests/integration/test_disks_app_interactive/test.py @@ -11,6 +11,7 @@ from typing import List, Tuple, Dict, Union, Optional import os + class ClickHouseDisksException(Exception): pass From 55c218b4a5cec3c50fd485b29f14a27cac75b572 Mon Sep 17 00:00:00 2001 From: divanik Date: Fri, 14 Jun 2024 14:20:49 +0000 Subject: [PATCH 125/439] Fix typo --- docs/en/operations/utilities/clickhouse-disks.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/utilities/clickhouse-disks.md b/docs/en/operations/utilities/clickhouse-disks.md index 5363449a960..aeca49c0e1e 100644 --- a/docs/en/operations/utilities/clickhouse-disks.md +++ b/docs/en/operations/utilities/clickhouse-disks.md @@ -15,7 +15,7 @@ A utility providing filesystem-like operations for ClickHouse disks. It can work * `--log-level` -- What [type](../server-configuration-parameters/settings#server_configuration_parameters-logger) of events to log, defaults to `none`. * `--disk` -- what disk to use for `mkdir, move, read, write, remove` commands. Defaults to `default`. * `--query, -q` -- single query that can be executed without launching interactive mode -* `--help, -h` -- print all the options and commamds with description +* `--help, -h` -- print all the options and commands with description ## Default Disks After the launch two disks are initialized. The first one is a disk `local` that is supposed to imitate local file system from which clickhouse-disks utility was launched. The second one is a disk `default` that is mounted to the local filesystem in the directory that can be found in config as a parameter `clickhouse/path` (default value is `/var/lib/clickhouse`). From 0d0fe2ab00132065d3438712cfbd3b257c7c3cb4 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 17 Jun 2024 13:30:51 +0000 Subject: [PATCH 126/439] Fix bad conflict resolution --- src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp | 7 +------ src/Storages/MergeTree/MergeTreeIOSettings.h | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 522161c458e..89e0db83073 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -627,14 +627,9 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai void MergeTreeDataPartWriterWide::fillDataChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) { ISerialization::SerializeBinaryBulkSettings serialize_settings; -<<<<<<< HEAD - serialize_settings.low_cardinality_max_dictionary_size = global_settings.low_cardinality_max_dictionary_size; - serialize_settings.low_cardinality_use_single_dictionary_for_part = global_settings.low_cardinality_use_single_dictionary_for_part != 0; - serialize_settings.use_compact_variant_discriminators_serialization = storage.getSettings()->use_compact_variant_discriminators_serialization; -======= serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part; ->>>>>>> d9a11faf4a3c02aaac2681aa9f7ee126123040b2 + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; WrittenOffsetColumns offset_columns; if (rows_written_in_last_mark > 0) { diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index 50ffdb8aa1f..04171656fcf 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -1,6 +1,6 @@ #pragma once #include -//#include +#include #include #include #include From a0a7f176126a8f9dc65d7ea8cb488e13feda6ccb Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 17 Jun 2024 16:32:16 +0200 Subject: [PATCH 127/439] add tags to the new tests --- .../03008_deduplication_insert_several_blocks_nonreplicated.sh | 1 + .../03008_deduplication_insert_several_blocks_replicated.sh | 1 + ...08_deduplication_mv_generates_several_blocks_nonreplicated.sh | 1 + ...03008_deduplication_mv_generates_several_blocks_replicated.sh | 1 + ...3008_deduplication_several_mv_into_one_table_nonreplicated.sh | 1 + .../03008_deduplication_several_mv_into_one_table_replicated.sh | 1 + 6 files changed, 6 insertions(+) diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh index c758e2fb3de..49eb52b47fd 100755 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh index 45b222b1fc4..53af06d4a6f 100755 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh index 50cf2a3bb75..7d4f5240cd1 100755 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh index 2b094e0309e..109d1674f3a 100755 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh index 33da54b90f1..fe3d610a758 100755 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh index 290d1f794b2..9adee6d53d4 100755 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 318a099d9023342a2741bfc51850a569ae6b6b46 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 17 Jun 2024 19:12:35 +0200 Subject: [PATCH 128/439] Fix docs --- docs/en/operations/settings/merge-tree-settings.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 1dd18606af3..01f9bf5a6ed 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -974,14 +974,13 @@ Default value: false - [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting -<<<<<<< HEAD ## use_compact_variant_discriminators_serialization {#use_compact_variant_discriminators_serialization} Enables compact mode for binary serialization of discriminators in Variant data type. This mode allows to use significantly less memory for storing discriminators in parts when there is mostly one variant or a lot of NULL values. Default value: true -======= + ### optimize_row_order Controls if the row order should be optimized during inserts to improve the compressability of the newly inserted table part. From 9be2ec65118a7b9d4847073ac759c042768581e8 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 17 Jun 2024 19:13:01 +0200 Subject: [PATCH 129/439] Fix docs --- docs/en/operations/settings/merge-tree-settings.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 01f9bf5a6ed..84bd3ba64a4 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -1026,4 +1026,3 @@ Compression rates of LZ4 or ZSTD improve on average by 20-40%. This setting works best for tables with no primary key or a low-cardinality primary key, i.e. a table with only few distinct primary key values. High-cardinality primary keys, e.g. involving timestamp columns of type `DateTime64`, are not expected to benefit from this setting. ->>>>>>> d9a11faf4a3c02aaac2681aa9f7ee126123040b2 From 6f841d89e73f804e4610bb60ab324d1b7b0bb805 Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 17 Jun 2024 17:40:52 +0000 Subject: [PATCH 130/439] Fix tests --- programs/disks/CommandList.cpp | 11 +++---- programs/disks/DisksApp.cpp | 22 ++++++------- programs/disks/DisksApp.h | 2 ++ programs/disks/DisksClient.cpp | 1 - programs/disks/DisksClient.h | 5 ++- programs/disks/ICommand.h | 2 +- .../02802_clickhouse_disks_s3_copy.sh | 18 +++++------ ...80_s3_plain_DROP_TABLE_MergeTree.reference | 32 +++++++++---------- .../02980_s3_plain_DROP_TABLE_MergeTree.sh | 4 +-- 9 files changed, 49 insertions(+), 48 deletions(-) diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index c21941c42ca..77479b1d217 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -56,12 +56,9 @@ private: std::cout << relative_path << ":\n"; - if (!file_names.empty()) - { - for (const auto & file_name : file_names) - if (show_hidden || (!file_name.starts_with('.'))) - selected_and_sorted_file_names.push_back(file_name); - } + for (const auto & file_name : file_names) + if (show_hidden || (!file_name.starts_with('.'))) + selected_and_sorted_file_names.push_back(file_name); std::sort(selected_and_sorted_file_names.begin(), selected_and_sorted_file_names.end()); for (const auto & file_name : selected_and_sorted_file_names) @@ -84,7 +81,9 @@ private: } }(); if (disk.isDirectory(path)) + { listRecursive(disk, path, show_hidden); + } } } }; diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index d2adf5bac7a..3b09feecc3b 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -49,17 +49,15 @@ CommandPtr DisksApp::getCommandByName(const String & command) const std::vector DisksApp::getEmptyCompletion(String command_name) const { auto command_ptr = command_descriptions.at(command_name); - auto answer = [&]() -> std::vector + std::vector answer{}; + if (multidisk_commands.contains(command_ptr->command_name)) { - if (multidisk_commands.contains(command_ptr->command_name)) - { - return client->getAllFilesByPatternFromAllDisks(""); - } - else - { - return client->getCurrentDiskWithPath().getAllFilesByPattern(""); - } - }(); + answer = client->getAllFilesByPatternFromAllDisks(""); + } + else + { + answer = client->getCurrentDiskWithPath().getAllFilesByPattern(""); + } for (const auto & disk_name : client->getAllDiskNames()) { answer.push_back(disk_name); @@ -211,7 +209,7 @@ bool DisksApp::processQueryText(const String & text) int code = getCurrentExceptionCode(); if (code == ErrorCodes::LOGICAL_ERROR) { - throw err; + throw std::move(err); } else if (code == ErrorCodes::BAD_ARGUMENTS) { @@ -467,7 +465,7 @@ int DisksApp::main(const std::vector & /*args*/) registerDisks(/* global_skip_access_check= */ true); registerFormats(); - auto shared_context = Context::createShared(); + shared_context = Context::createShared(); global_context = Context::createGlobal(shared_context.get()); global_context->makeGlobalContext(); diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h index 75d604bf63c..1ecd9944fb8 100644 --- a/programs/disks/DisksApp.h +++ b/programs/disks/DisksApp.h @@ -68,6 +68,8 @@ private: static String word_break_characters; // General command line arguments parsing fields + + SharedContextHolder shared_context; ContextMutablePtr global_context; ProgramOptionsDescription options_description; CommandLineOptions options; diff --git a/programs/disks/DisksClient.cpp b/programs/disks/DisksClient.cpp index 4f808f85ab6..e38f7ec99b8 100644 --- a/programs/disks/DisksClient.cpp +++ b/programs/disks/DisksClient.cpp @@ -14,7 +14,6 @@ namespace ErrorCodes namespace DB { - DiskWithPath::DiskWithPath(DiskPtr disk_, std::optional path_) : disk(disk_) { if (path_.has_value()) diff --git a/programs/disks/DisksClient.h b/programs/disks/DisksClient.h index 3320c5f7cef..ab99d2f6590 100644 --- a/programs/disks/DisksClient.h +++ b/programs/disks/DisksClient.h @@ -32,7 +32,10 @@ public: String getCurrentPath() const { return path; } - bool isDirectory(const String & any_path) const { return disk->isDirectory(getRelativeFromRoot(any_path)); } + bool isDirectory(const String & any_path) const + { + return disk->isDirectory(getRelativeFromRoot(any_path)) || disk->isDirectory(getAbsolutePath(any_path)); + } std::vector listAllFilesByPath(const String & any_path) const; diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h index b1e594066af..2b409d4ade6 100644 --- a/programs/disks/ICommand.h +++ b/programs/disks/ICommand.h @@ -53,7 +53,7 @@ protected: { return options[name].as(); } - catch (boost::bad_any_cast) + catch (boost::bad_any_cast &) { throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Argument '{}' has wrong type and can't be parsed", name); } diff --git a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh index 2b9e5296a05..d317b2e8a1e 100755 --- a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh +++ b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh @@ -14,14 +14,14 @@ function run_test_for_disk() echo "$disk" - clickhouse-disks -C "$config" --disk "$disk" write --input "$config" $CLICKHOUSE_DATABASE/test - clickhouse-disks -C "$config" --log-level test --disk "$disk" copy $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy |& { + clickhouse-disks -C "$config" --disk "$disk" --query "write --path-from "$config" $CLICKHOUSE_DATABASE/test" + clickhouse-disks -C "$config" --log-level test --disk "$disk" --query "copy $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { grep -o -e "Single part upload has completed." -e "Single operation copy has completed." } - clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test + clickhouse-disks -C "$config" --disk "$disk" --query "remove $CLICKHOUSE_DATABASE/test" # NOTE: this is due to "copy" does works like "cp -R from to/" instead of "cp from to" - clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test.copy/test - clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test.copy + clickhouse-disks -C "$config" --disk "$disk" --query "remove $CLICKHOUSE_DATABASE/test.copy/test" + clickhouse-disks -C "$config" --disk "$disk" --query "remove $CLICKHOUSE_DATABASE/test.copy" } function run_test_copy_from_s3_to_s3(){ @@ -29,13 +29,13 @@ function run_test_copy_from_s3_to_s3(){ local disk_dest=$1 && shift echo "copy from $disk_src to $disk_dest" - clickhouse-disks -C "$config" --disk "$disk_src" write --input "$config" $CLICKHOUSE_DATABASE/test + clickhouse-disks -C "$config" --disk "$disk_src" --query "write --path-from "$config" $CLICKHOUSE_DATABASE/test" - clickhouse-disks -C "$config" --log-level test copy --disk-from "$disk_src" --disk-to "$disk_dest" $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy |& { + clickhouse-disks -C "$config" --log-level test --query "copy --disk-from "$disk_src" --disk-to "$disk_dest" $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { grep -o -e "Single part upload has completed." -e "Single operation copy has completed." } - clickhouse-disks -C "$config" --disk "$disk_dest" remove $CLICKHOUSE_DATABASE/test.copy/test - clickhouse-disks -C "$config" --disk "$disk_dest" remove $CLICKHOUSE_DATABASE/test.copy + clickhouse-disks -C "$config" --disk "$disk_dest" --query "remove $CLICKHOUSE_DATABASE/test.copy/test" + clickhouse-disks -C "$config" --disk "$disk_dest" --query "remove $CLICKHOUSE_DATABASE/test.copy" } run_test_for_disk s3_plain_native_copy diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference index 531163e1d84..3135f2d01e1 100644 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference @@ -3,28 +3,28 @@ data after ATTACH 1 Files before DETACH TABLE all_1_1_0 -backups/ordinary_default/data/ordinary_default/data/all_1_1_0: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data/all_1_1_0: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json Files after DETACH TABLE all_1_1_0 -backups/ordinary_default/data/ordinary_default/data/all_1_1_0: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data/all_1_1_0: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh index 12d08159012..e6427ab26f8 100755 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh @@ -49,11 +49,11 @@ path=$($CLICKHOUSE_CLIENT -q "SELECT replace(data_paths[1], 's3_plain', '') FROM path=${path%/} echo "Files before DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "${path:?}" | tail -n+2 +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive "${path:?}"" | tail -n+2 $CLICKHOUSE_CLIENT -q "detach table data" echo "Files after DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "$path" | tail -n+2 +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive "$path"" | tail -n+2 # metadata file is left $CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE" From 1ef9bad76fadd1aa22c047760012c7644e1394b8 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 19 Jun 2024 14:44:26 +0200 Subject: [PATCH 131/439] Custom key support for cluster_for_parallel_replicas --- src/Client/HedgedConnections.cpp | 7 +- src/Client/MultiplexedConnections.cpp | 22 +-- src/Client/MultiplexedConnections.h | 10 +- .../ClusterProxy/executeQuery.cpp | 136 ++++++++++++-- src/Interpreters/ClusterProxy/executeQuery.h | 39 +++- src/Interpreters/Context.cpp | 33 +++- src/Interpreters/Context.h | 12 +- src/Interpreters/InterpreterSelectQuery.cpp | 19 +- src/Planner/PlannerJoinTree.cpp | 119 +++++++----- src/QueryPipeline/RemoteQueryExecutor.cpp | 10 +- src/Storages/MergeTree/MergeTreeData.cpp | 17 +- src/Storages/StorageDistributed.cpp | 40 +--- src/Storages/StorageMergeTree.cpp | 50 +++-- src/Storages/StorageReplicatedMergeTree.cpp | 34 +++- .../test_parallel_replicas_custom_key/test.py | 150 ++++++++++++--- ...max_parallel_replicas_custom_key.reference | 173 ----------------- .../02535_max_parallel_replicas_custom_key.sh | 46 ----- ..._parallel_replicas_custom_key_mt.reference | 177 ++++++++++++++++++ ...535_max_parallel_replicas_custom_key_mt.sh | 54 ++++++ ...parallel_replicas_custom_key_rmt.reference | 177 ++++++++++++++++++ ...35_max_parallel_replicas_custom_key_rmt.sh | 54 ++++++ 21 files changed, 977 insertions(+), 402 deletions(-) delete mode 100644 tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference delete mode 100755 tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh create mode 100644 tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.reference create mode 100755 tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.sh create mode 100644 tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.reference create mode 100755 tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.sh diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index fb4d9a6bdcc..cd662f13ce3 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -187,15 +187,16 @@ void HedgedConnections::sendQuery( modified_settings.group_by_two_level_threshold_bytes = 0; } - const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0; + const bool enable_offset_parallel_processing = context->canUseOffsetParallelReplicas(); - if (offset_states.size() > 1 && enable_sample_offset_parallel_processing) + if (offset_states.size() > 1 && enable_offset_parallel_processing) { modified_settings.parallel_replicas_count = offset_states.size(); modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset; } - replica.connection->sendQuery(timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {}); + replica.connection->sendQuery( + timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {}); replica.change_replica_timeout.setRelative(timeouts.receive_data_timeout); replica.packet_receiver->setTimeout(hedged_connections_factory.getConnectionTimeouts().receive_timeout); }; diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 5d0fc8fd39e..8cafad5106e 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -23,8 +24,8 @@ namespace ErrorCodes } -MultiplexedConnections::MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler) - : settings(settings_) +MultiplexedConnections::MultiplexedConnections(Connection & connection, ContextPtr context_, const ThrottlerPtr & throttler) + : context(std::move(context_)), settings(context->getSettingsRef()) { connection.setThrottler(throttler); @@ -36,9 +37,9 @@ MultiplexedConnections::MultiplexedConnections(Connection & connection, const Se } -MultiplexedConnections::MultiplexedConnections(std::shared_ptr connection_ptr_, const Settings & settings_, const ThrottlerPtr & throttler) - : settings(settings_) - , connection_ptr(connection_ptr_) +MultiplexedConnections::MultiplexedConnections( + std::shared_ptr connection_ptr_, ContextPtr context_, const ThrottlerPtr & throttler) + : context(std::move(context_)), settings(context->getSettingsRef()), connection_ptr(connection_ptr_) { connection_ptr->setThrottler(throttler); @@ -50,9 +51,8 @@ MultiplexedConnections::MultiplexedConnections(std::shared_ptr conne } MultiplexedConnections::MultiplexedConnections( - std::vector && connections, - const Settings & settings_, const ThrottlerPtr & throttler) - : settings(settings_) + std::vector && connections, ContextPtr context_, const ThrottlerPtr & throttler) + : context(std::move(context_)), settings(context->getSettingsRef()) { /// If we didn't get any connections from pool and getMany() did not throw exceptions, this means that /// `skip_unavailable_shards` was set. Then just return. @@ -150,18 +150,18 @@ void MultiplexedConnections::sendQuery( } } - const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0; + const bool enable_offset_parallel_processing = context->canUseOffsetParallelReplicas(); size_t num_replicas = replica_states.size(); if (num_replicas > 1) { - if (enable_sample_offset_parallel_processing) + if (enable_offset_parallel_processing) /// Use multiple replicas for parallel query processing. modified_settings.parallel_replicas_count = num_replicas; for (size_t i = 0; i < num_replicas; ++i) { - if (enable_sample_offset_parallel_processing) + if (enable_offset_parallel_processing) modified_settings.parallel_replica_offset = i; replica_states[i].connection->sendQuery( diff --git a/src/Client/MultiplexedConnections.h b/src/Client/MultiplexedConnections.h index 9f7b47e0562..dec32e52d4f 100644 --- a/src/Client/MultiplexedConnections.h +++ b/src/Client/MultiplexedConnections.h @@ -10,7 +10,6 @@ namespace DB { - /** To retrieve data directly from multiple replicas (connections) from one shard * within a single thread. As a degenerate case, it can also work with one connection. * It is assumed that all functions except sendCancel are always executed in one thread. @@ -21,14 +20,12 @@ class MultiplexedConnections final : public IConnections { public: /// Accepts ready connection. - MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler_); + MultiplexedConnections(Connection & connection, ContextPtr context_, const ThrottlerPtr & throttler_); /// Accepts ready connection and keep it alive before drain - MultiplexedConnections(std::shared_ptr connection_, const Settings & settings_, const ThrottlerPtr & throttler_); + MultiplexedConnections(std::shared_ptr connection_, ContextPtr context_, const ThrottlerPtr & throttler_); /// Accepts a vector of connections to replicas of one shard already taken from pool. - MultiplexedConnections( - std::vector && connections, - const Settings & settings_, const ThrottlerPtr & throttler_); + MultiplexedConnections(std::vector && connections, ContextPtr context_, const ThrottlerPtr & throttler_); void sendScalarsData(Scalars & data) override; void sendExternalTablesData(std::vector & data) override; @@ -86,6 +83,7 @@ private: /// Mark the replica as invalid. void invalidateReplica(ReplicaState & replica_state); + ContextPtr context; const Settings & settings; /// The current number of valid connections to the replicas of this shard. diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 91c0c592f28..337eb21dade 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -8,23 +8,28 @@ #include #include #include +#include #include -#include -#include #include +#include +#include +#include +#include +#include #include +#include #include #include -#include #include +#include #include +#include #include #include #include -#include +#include #include -#include -#include +#include namespace DB { @@ -172,7 +177,7 @@ ContextMutablePtr updateSettingsAndClientInfoForCluster(const Cluster & cluster, /// in case of parallel replicas custom key use round robing load balancing /// so custom key partitions will be spread over nodes in round-robin fashion - if (context->canUseParallelReplicasCustomKey(cluster) && !settings.load_balancing.changed) + if (context->canUseParallelReplicasCustomKeyForCluster(cluster) && !settings.load_balancing.changed) { new_settings.load_balancing = LoadBalancing::ROUND_ROBIN; } @@ -180,6 +185,10 @@ ContextMutablePtr updateSettingsAndClientInfoForCluster(const Cluster & cluster, auto new_context = Context::createCopy(context); new_context->setSettings(new_settings); new_context->setClientInfo(new_client_info); + + if (context->canUseParallelReplicasCustomKeyForCluster(cluster)) + new_context->disableOffsetParallelReplicas(); + return new_context; } @@ -231,17 +240,56 @@ void executeQuery( LoggerPtr log, ContextPtr context, const SelectQueryInfo & query_info, + const ColumnsDescription & columns, const ExpressionActionsPtr & sharding_key_expr, const std::string & sharding_key_column_name, const DistributedSettings & distributed_settings, - AdditionalShardFilterGenerator shard_filter_generator, bool is_remote_function) { const Settings & settings = context->getSettingsRef(); + if (settings.max_distributed_depth && context->getClientInfo().distributed_depth >= settings.max_distributed_depth) throw Exception(ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH, "Maximum distributed depth exceeded"); + /// Return directly (with correct header) if no shard to query. + if (query_info.getCluster()->getShardsInfo().empty()) + { + if (settings.allow_experimental_analyzer) + return; + + Pipe pipe(std::make_shared(header)); + auto read_from_pipe = std::make_unique(std::move(pipe)); + read_from_pipe->setStepDescription("Read from NullSource (Distributed)"); + query_plan.addStep(std::move(read_from_pipe)); + return; + } + + ClusterProxy::AdditionalShardFilterGenerator shard_filter_generator; + if (context->canUseParallelReplicasCustomKeyForCluster(*query_info.getCluster())) + { + if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *context)) + { + shard_filter_generator = + [my_custom_key_ast = std::move(custom_key_ast), + column_description = columns, + custom_key_type = settings.parallel_replicas_custom_key_filter_type.value, + custom_key_range_lower = settings.parallel_replicas_custom_key_range_lower.value, + custom_key_range_upper = settings.parallel_replicas_custom_key_range_upper.value, + query_context = context, + replica_count = query_info.getCluster()->getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr + { + return getCustomKeyFilterForParallelReplica( + replica_count, + replica_num - 1, + my_custom_key_ast, + {custom_key_type, custom_key_range_lower, custom_key_range_upper}, + column_description, + query_context); + }; + } + } + const ClusterPtr & not_optimized_cluster = query_info.cluster; std::vector plans; @@ -412,14 +460,7 @@ void executeQueryWithParallelReplicas( const auto & settings = context->getSettingsRef(); /// check cluster for parallel replicas - if (settings.cluster_for_parallel_replicas.value.empty()) - { - throw Exception( - ErrorCodes::CLUSTER_DOESNT_EXIST, - "Reading in parallel from replicas is enabled but cluster to execute query is not provided. Please set " - "'cluster_for_parallel_replicas' setting"); - } - auto not_optimized_cluster = context->getCluster(settings.cluster_for_parallel_replicas); + auto not_optimized_cluster = context->getClusterForParallelReplicas(); auto new_context = Context::createCopy(context); @@ -542,6 +583,69 @@ void executeQueryWithParallelReplicas( executeQueryWithParallelReplicas(query_plan, storage_id, header, processed_stage, modified_query_ast, context, storage_limits); } +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + const SelectQueryInfo & query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const Block & header, + ContextPtr context) +{ + ColumnsDescriptionByShardNum columns_object; + if (hasDynamicSubcolumns(columns)) + columns_object = getExtendedObjectsOfRemoteTables(*query_info.cluster, storage_id, columns, context); + + ClusterProxy::SelectStreamFactory select_stream_factory + = ClusterProxy::SelectStreamFactory(header, columns_object, snapshot, processed_stage); + + ClusterProxy::executeQuery( + query_plan, + header, + processed_stage, + storage_id, + /*table_func_ptr=*/nullptr, + select_stream_factory, + getLogger("executeQueryWithParallelReplicasCustomKey"), + context, + query_info, + columns, + /*sharding_key_expr=*/nullptr, + /*sharding_key_column_name=*/{}, + /*distributed_settings=*/{}, + /*is_remote_function= */ false); +} + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + const SelectQueryInfo & query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const QueryTreeNodePtr & query_tree, + ContextPtr context) +{ + auto header = InterpreterSelectQueryAnalyzer::getSampleBlock(query_tree, context, SelectQueryOptions(processed_stage).analyze()); + executeQueryWithParallelReplicasCustomKey(query_plan, storage_id, query_info, columns, snapshot, processed_stage, header, context); +} + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + SelectQueryInfo query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const ASTPtr & query_ast, + ContextPtr context) +{ + auto header = InterpreterSelectQuery(query_ast, context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); + query_info.query = ClusterProxy::rewriteSelectQuery( + context, query_info.query, storage_id.getDatabaseName(), storage_id.getTableName(), /*table_function_ptr=*/nullptr); + executeQueryWithParallelReplicasCustomKey(query_plan, storage_id, query_info, columns, snapshot, processed_stage, header, context); +} } } diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 6548edf8939..cf60fc3f168 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -1,7 +1,7 @@ #pragma once -#include #include +#include #include namespace DB @@ -13,6 +13,11 @@ class Cluster; using ClusterPtr = std::shared_ptr; struct SelectQueryInfo; +class ColumnsDescription; +struct StorageSnapshot; + +using StorageSnapshotPtr = std::shared_ptr; + class Pipe; class QueryPlan; @@ -60,10 +65,10 @@ void executeQuery( LoggerPtr log, ContextPtr context, const SelectQueryInfo & query_info, + const ColumnsDescription & columns, const ExpressionActionsPtr & sharding_key_expr, const std::string & sharding_key_column_name, const DistributedSettings & distributed_settings, - AdditionalShardFilterGenerator shard_filter_generator, bool is_remote_function); void executeQueryWithParallelReplicas( @@ -91,6 +96,36 @@ void executeQueryWithParallelReplicas( const PlannerContextPtr & planner_context, ContextPtr context, std::shared_ptr storage_limits); + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + const SelectQueryInfo & query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const Block & header, + ContextPtr context); + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + const SelectQueryInfo & query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const QueryTreeNodePtr & query_tree, + ContextPtr context); + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + SelectQueryInfo query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const ASTPtr & query_ast, + ContextPtr context); } } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index f4433cd8288..b091e73436b 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -5394,10 +5394,37 @@ bool Context::canUseParallelReplicasOnFollower() const return canUseTaskBasedParallelReplicas() && getClientInfo().collaborate_with_initiator; } -bool Context::canUseParallelReplicasCustomKey(const Cluster & cluster) const +bool Context::canUseParallelReplicasCustomKey() const { - return settings.max_parallel_replicas > 1 && getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY - && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1; + return settings.max_parallel_replicas > 1 && getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY; +} + +bool Context::canUseParallelReplicasCustomKeyForCluster(const Cluster & cluster) const +{ + return canUseParallelReplicasCustomKey() && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1; +} + +bool Context::canUseOffsetParallelReplicas() const +{ + return offset_parallel_replicas_enabled && settings.max_parallel_replicas > 1 + && getParallelReplicasMode() != Context::ParallelReplicasMode::READ_TASKS; +} + +void Context::disableOffsetParallelReplicas() +{ + offset_parallel_replicas_enabled = false; +} + +ClusterPtr Context::getClusterForParallelReplicas() const +{ + /// check cluster for parallel replicas + if (settings.cluster_for_parallel_replicas.value.empty()) + throw Exception( + ErrorCodes::CLUSTER_DOESNT_EXIST, + "Reading in parallel from replicas is enabled but cluster to execute query is not provided. Please set " + "'cluster_for_parallel_replicas' setting"); + + return getCluster(settings.cluster_for_parallel_replicas); } void Context::setPreparedSetsCache(const PreparedSetsCachePtr & cache) diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 7c7b2e4ea64..5fec7b1c2c5 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -151,6 +151,8 @@ class AsyncLoader; struct TemporaryTableHolder; using TemporaryTablesMapping = std::map>; +using ClusterPtr = std::shared_ptr; + class LoadTask; using LoadTaskPtr = std::shared_ptr; using LoadTaskPtrs = std::vector; @@ -436,6 +438,8 @@ protected: /// mutation tasks of one mutation executed against different parts of the same table. PreparedSetsCachePtr prepared_sets_cache; + bool offset_parallel_replicas_enabled = true; + public: /// Some counters for current query execution. /// Most of them are workarounds and should be removed in the future. @@ -1273,7 +1277,13 @@ public: bool canUseTaskBasedParallelReplicas() const; bool canUseParallelReplicasOnInitiator() const; bool canUseParallelReplicasOnFollower() const; - bool canUseParallelReplicasCustomKey(const Cluster & cluster) const; + bool canUseParallelReplicasCustomKey() const; + bool canUseParallelReplicasCustomKeyForCluster(const Cluster & cluster) const; + bool canUseOffsetParallelReplicas() const; + + void disableOffsetParallelReplicas(); + + ClusterPtr getClusterForParallelReplicas() const; enum class ParallelReplicasMode : uint8_t { diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 8e072779b53..c7688b3471d 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -566,7 +566,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( settings.additional_table_filters, joined_tables.tablesWithColumns().front().table, *context); ASTPtr parallel_replicas_custom_filter_ast = nullptr; - if (storage && context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY && !joined_tables.tablesWithColumns().empty()) + if (storage && context->canUseParallelReplicasCustomKey() && !joined_tables.tablesWithColumns().empty()) { if (settings.parallel_replicas_count > 1) { @@ -587,16 +587,23 @@ InterpreterSelectQuery::InterpreterSelectQuery( else if (settings.parallel_replica_offset > 0) { throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Parallel replicas processing with custom_key has been requested " - "(setting 'max_parallel_replicas') but the table does not have custom_key defined for it " - "or it's invalid (settings `parallel_replicas_custom_key`)"); + ErrorCodes::BAD_ARGUMENTS, + "Parallel replicas processing with custom_key has been requested " + "(setting 'max_parallel_replicas') but the table does not have custom_key defined for it " + "or it's invalid (settings `parallel_replicas_custom_key`)"); } } else if (auto * distributed = dynamic_cast(storage.get()); - distributed && context->canUseParallelReplicasCustomKey(*distributed->getCluster())) + distributed && context->canUseParallelReplicasCustomKeyForCluster(*distributed->getCluster())) { context->setSetting("distributed_group_by_no_merge", 2); + context->setSetting("prefer_localhost_replica", Field(0)); + } + else if ( + storage->isMergeTree() && (storage->supportsReplication() || settings.parallel_replicas_for_non_replicated_merge_tree) + && context->canUseParallelReplicasCustomKeyForCluster(*context->getClusterForParallelReplicas())) + { + context->setSetting("prefer_localhost_replica", Field(0)); } } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 6ec460b0894..1dcbd87f495 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -81,6 +81,7 @@ namespace ErrorCodes extern const int TOO_MANY_COLUMNS; extern const int UNSUPPORTED_METHOD; extern const int BAD_ARGUMENTS; + extern const int CLUSTER_DOESNT_EXIST; } namespace @@ -834,7 +835,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (row_policy_filter_info.actions) table_expression_data.setRowLevelFilterActions(row_policy_filter_info.actions); - if (query_context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY) + if (query_context->canUseParallelReplicasCustomKey()) { if (settings.parallel_replicas_count > 1) { @@ -843,9 +844,10 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres add_filter(parallel_replicas_custom_key_filter_info, "Parallel replicas custom key filter"); } else if (auto * distributed = typeid_cast(storage.get()); - distributed && query_context->canUseParallelReplicasCustomKey(*distributed->getCluster())) + distributed && query_context->canUseParallelReplicasCustomKeyForCluster(*distributed->getCluster())) { planner_context->getMutableQueryContext()->setSetting("distributed_group_by_no_merge", 2); + planner_context->getMutableQueryContext()->setSetting("prefer_localhost_replica", Field{0}); } } @@ -879,7 +881,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres }; /// query_plan can be empty if there is nothing to read - if (query_plan.isInitialized() && parallel_replicas_enabled_for_storage(storage, settings) && query_context->canUseParallelReplicasOnInitiator()) + if (query_plan.isInitialized() && parallel_replicas_enabled_for_storage(storage, settings)) { // (1) find read step QueryPlan::Node * node = query_plan.getRootNode(); @@ -906,54 +908,79 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } chassert(reading); - - // (2) if it's ReadFromMergeTree - run index analysis and check number of rows to read - if (settings.parallel_replicas_min_number_of_rows_per_replica > 0) + if (query_context->canUseParallelReplicasCustomKey()) { - auto result_ptr = reading->selectRangesToRead(); - - UInt64 rows_to_read = result_ptr->selected_rows; - if (table_expression_query_info.limit > 0 && table_expression_query_info.limit < rows_to_read) - rows_to_read = table_expression_query_info.limit; - - if (max_block_size_limited && (max_block_size_limited < rows_to_read)) - rows_to_read = max_block_size_limited; - - const size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica; - LOG_TRACE( - getLogger("Planner"), - "Estimated {} rows to read. It is enough work for {} parallel replicas", - rows_to_read, - number_of_replicas_to_use); - - if (number_of_replicas_to_use <= 1) + auto cluster = query_context->getClusterForParallelReplicas(); + if (query_context->canUseParallelReplicasCustomKeyForCluster(*cluster) + && query_context->getClientInfo().distributed_depth == 0) { - planner_context->getMutableQueryContext()->setSetting( - "allow_experimental_parallel_reading_from_replicas", Field(0)); - planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1}); - LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); - } - else if (number_of_replicas_to_use < settings.max_parallel_replicas) - { - planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", number_of_replicas_to_use); - LOG_DEBUG(getLogger("Planner"), "Reducing the number of replicas to use to {}", number_of_replicas_to_use); + planner_context->getMutableQueryContext()->setSetting("prefer_localhost_replica", Field{0}); + auto modified_query_info = select_query_info; + modified_query_info.cluster = std::move(cluster); + from_stage = QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit; + QueryPlan query_plan_parallel_replicas; + ClusterProxy::executeQueryWithParallelReplicasCustomKey( + query_plan_parallel_replicas, + storage->getStorageID(), + modified_query_info, + storage->getInMemoryMetadataPtr()->getColumns(), + storage_snapshot, + from_stage, + table_expression_query_info.query_tree, + query_context); + query_plan = std::move(query_plan_parallel_replicas); } } - - // (3) if parallel replicas still enabled - replace reading step - if (planner_context->getQueryContext()->canUseParallelReplicasOnInitiator()) + else if (query_context->canUseParallelReplicasOnInitiator()) { - from_stage = QueryProcessingStage::WithMergeableState; - QueryPlan query_plan_parallel_replicas; - ClusterProxy::executeQueryWithParallelReplicas( - query_plan_parallel_replicas, - storage->getStorageID(), - from_stage, - table_expression_query_info.query_tree, - table_expression_query_info.planner_context, - query_context, - table_expression_query_info.storage_limits); - query_plan = std::move(query_plan_parallel_replicas); + // (2) if it's ReadFromMergeTree - run index analysis and check number of rows to read + if (settings.parallel_replicas_min_number_of_rows_per_replica > 0) + { + auto result_ptr = reading->selectRangesToRead(); + + UInt64 rows_to_read = result_ptr->selected_rows; + if (table_expression_query_info.limit > 0 && table_expression_query_info.limit < rows_to_read) + rows_to_read = table_expression_query_info.limit; + + if (max_block_size_limited && (max_block_size_limited < rows_to_read)) + rows_to_read = max_block_size_limited; + + const size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica; + LOG_TRACE( + getLogger("Planner"), + "Estimated {} rows to read. It is enough work for {} parallel replicas", + rows_to_read, + number_of_replicas_to_use); + + if (number_of_replicas_to_use <= 1) + { + planner_context->getMutableQueryContext()->setSetting( + "allow_experimental_parallel_reading_from_replicas", Field(0)); + planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1}); + LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); + } + else if (number_of_replicas_to_use < settings.max_parallel_replicas) + { + planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", number_of_replicas_to_use); + LOG_DEBUG(getLogger("Planner"), "Reducing the number of replicas to use to {}", number_of_replicas_to_use); + } + } + + // (3) if parallel replicas still enabled - replace reading step + if (planner_context->getQueryContext()->canUseParallelReplicasOnInitiator()) + { + from_stage = QueryProcessingStage::WithMergeableState; + QueryPlan query_plan_parallel_replicas; + ClusterProxy::executeQueryWithParallelReplicas( + query_plan_parallel_replicas, + storage->getStorageID(), + from_stage, + table_expression_query_info.query_tree, + table_expression_query_info.planner_context, + query_context, + table_expression_query_info.storage_limits); + query_plan = std::move(query_plan_parallel_replicas); + } } } diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 1686a101bde..bde8ce78f55 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -105,7 +105,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( connection_entries.emplace_back(std::move(result.entry)); } - auto res = std::make_unique(std::move(connection_entries), current_settings, throttler); + auto res = std::make_unique(std::move(connection_entries), context, throttler); if (extension_ && extension_->replica_info) res->setReplicaInfo(*extension_->replica_info); @@ -127,7 +127,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( { create_connections = [this, &connection, throttler, extension_](AsyncCallback) { - auto res = std::make_unique(connection, context->getSettingsRef(), throttler); + auto res = std::make_unique(connection, context, throttler); if (extension_ && extension_->replica_info) res->setReplicaInfo(*extension_->replica_info); return res; @@ -148,7 +148,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( { create_connections = [this, connection_ptr, throttler, extension_](AsyncCallback) { - auto res = std::make_unique(connection_ptr, context->getSettingsRef(), throttler); + auto res = std::make_unique(connection_ptr, context, throttler); if (extension_ && extension_->replica_info) res->setReplicaInfo(*extension_->replica_info); return res; @@ -169,7 +169,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( { create_connections = [this, connections_, throttler, extension_](AsyncCallback) mutable { - auto res = std::make_unique(std::move(connections_), context->getSettingsRef(), throttler); + auto res = std::make_unique(std::move(connections_), context, throttler); if (extension_ && extension_->replica_info) res->setReplicaInfo(*extension_->replica_info); return res; @@ -234,7 +234,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( timeouts, current_settings, pool_mode, std::move(async_callback), skip_unavailable_endpoints, priority_func); } - auto res = std::make_unique(std::move(connection_entries), current_settings, throttler); + auto res = std::make_unique(std::move(connection_entries), context, throttler); if (extension && extension->replica_info) res->setReplicaInfo(*extension->replica_info); return res; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 89f39c65517..85e6020ff72 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -190,6 +190,7 @@ namespace ErrorCodes extern const int LIMIT_EXCEEDED; extern const int CANNOT_FORGET_PARTITION; extern const int DATA_TYPE_CANNOT_BE_USED_IN_KEY; + extern const int CLUSTER_DOESNT_EXIST; } static void checkSuspiciousIndices(const ASTFunction * index_function) @@ -7075,6 +7076,20 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage( /// with new analyzer, Planner make decision regarding parallel replicas usage, and so about processing stage on reading if (!query_context->getSettingsRef().allow_experimental_analyzer) { + const auto & settings = query_context->getSettingsRef(); + if (query_context->canUseParallelReplicasCustomKey()) + { + if (query_context->getClientInfo().distributed_depth > 0) + return QueryProcessingStage::FetchColumns; + + if (!settings.parallel_replicas_for_non_replicated_merge_tree) + return QueryProcessingStage::Enum::FetchColumns; + + if (to_stage >= QueryProcessingStage::WithMergeableState + && query_context->canUseParallelReplicasCustomKeyForCluster(*query_context->getClusterForParallelReplicas())) + return QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit; + } + if (query_context->getClientInfo().collaborate_with_initiator) return QueryProcessingStage::Enum::FetchColumns; @@ -7086,7 +7101,7 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage( return QueryProcessingStage::Enum::WithMergeableState; /// For non-replicated MergeTree we allow them only if parallel_replicas_for_non_replicated_merge_tree is enabled - if (query_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree) + if (settings.parallel_replicas_for_non_replicated_merge_tree) return QueryProcessingStage::Enum::WithMergeableState; } } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 5048ef4788e..c5721456f1a 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -426,7 +426,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( query_info.cluster = cluster; - if (!local_context->canUseParallelReplicasCustomKey(*cluster)) + if (!local_context->canUseParallelReplicasCustomKeyForCluster(*cluster)) { if (nodes > 1 && settings.optimize_skip_unused_shards) { @@ -871,20 +871,6 @@ void StorageDistributed::read( remote_database, remote_table, remote_table_function_ptr); } - /// Return directly (with correct header) if no shard to query. - if (modified_query_info.getCluster()->getShardsInfo().empty()) - { - if (local_context->getSettingsRef().allow_experimental_analyzer) - return; - - Pipe pipe(std::make_shared(header)); - auto read_from_pipe = std::make_unique(std::move(pipe)); - read_from_pipe->setStepDescription("Read from NullSource (Distributed)"); - query_plan.addStep(std::move(read_from_pipe)); - - return; - } - const auto & snapshot_data = assert_cast(*storage_snapshot->data); ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory( @@ -893,28 +879,6 @@ void StorageDistributed::read( storage_snapshot, processed_stage); - const auto & settings = local_context->getSettingsRef(); - - ClusterProxy::AdditionalShardFilterGenerator additional_shard_filter_generator; - if (local_context->canUseParallelReplicasCustomKey(*modified_query_info.getCluster())) - { - if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *local_context)) - { - additional_shard_filter_generator = - [my_custom_key_ast = std::move(custom_key_ast), - column_description = this->getInMemoryMetadataPtr()->columns, - custom_key_type = settings.parallel_replicas_custom_key_filter_type.value, - custom_key_range_lower = settings.parallel_replicas_custom_key_range_lower.value, - custom_key_range_upper = settings.parallel_replicas_custom_key_range_upper.value, - context = local_context, - replica_count = modified_query_info.getCluster()->getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr - { - return getCustomKeyFilterForParallelReplica( - replica_count, replica_num - 1, my_custom_key_ast, {custom_key_type, custom_key_range_lower, custom_key_range_upper}, column_description, context); - }; - } - } - ClusterProxy::executeQuery( query_plan, header, @@ -925,10 +889,10 @@ void StorageDistributed::read( log, local_context, modified_query_info, + getInMemoryMetadataPtr()->columns, sharding_key_expr, sharding_key_column_name, distributed_settings, - additional_shard_filter_generator, /* is_remote_function= */ static_cast(owned_cluster)); /// This is a bug, it is possible only when there is no shards to query, and this is handled earlier. diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 9352f772ce1..95a0fe13567 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1,5 +1,6 @@ #include "StorageMergeTree.h" #include "Core/QueryProcessingStage.h" +#include "Interpreters/ClientInfo.h" #include "Storages/MergeTree/IMergeTreeDataPart.h" #include @@ -66,6 +67,7 @@ namespace ErrorCodes extern const int ABORTED; extern const int SUPPORT_IS_DISABLED; extern const int TABLE_IS_READ_ONLY; + extern const int CLUSTER_DOESNT_EXIST; } namespace ActionLocks @@ -220,24 +222,44 @@ void StorageMergeTree::read( { ClusterProxy::executeQueryWithParallelReplicas( query_plan, getStorageID(), processed_stage, query_info.query, local_context, query_info.storage_limits); + return; } - else - { - const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() - && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree - && (!local_context->getSettingsRef().allow_experimental_analyzer || query_info.analyzer_can_use_parallel_replicas_on_follower); - if (auto plan = reader.read( - column_names, + if (local_context->canUseParallelReplicasCustomKey() && settings.parallel_replicas_for_non_replicated_merge_tree + && !settings.allow_experimental_analyzer && local_context->getClientInfo().distributed_depth == 0) + { + if (auto cluster = local_context->getClusterForParallelReplicas(); + local_context->canUseParallelReplicasCustomKeyForCluster(*cluster)) + { + auto modified_query_info = query_info; + modified_query_info.cluster = std::move(cluster); + ClusterProxy::executeQueryWithParallelReplicasCustomKey( + query_plan, + getStorageID(), + std::move(modified_query_info), + getInMemoryMetadataPtr()->getColumns(), storage_snapshot, - query_info, - local_context, - max_block_size, - num_streams, - nullptr, - enable_parallel_reading)) - query_plan = std::move(*plan); + processed_stage, + query_info.query, + local_context); + return; + } } + + const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() + && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree + && (!local_context->getSettingsRef().allow_experimental_analyzer || query_info.analyzer_can_use_parallel_replicas_on_follower); + + if (auto plan = reader.read( + column_names, + storage_snapshot, + query_info, + local_context, + max_block_size, + num_streams, + nullptr, + enable_parallel_reading)) + query_plan = std::move(*plan); } std::optional StorageMergeTree::totalRows(const Settings &) const diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index db58d0081c6..f82f4b7bb30 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5460,13 +5460,39 @@ void StorageReplicatedMergeTree::read( /// 2. Do not read parts that have not yet been written to the quorum of the replicas. /// For this you have to synchronously go to ZooKeeper. if (settings.select_sequential_consistency) + { readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); + return; + } /// reading step for parallel replicas with new analyzer is built in Planner, so don't do it here - else if (local_context->canUseParallelReplicasOnInitiator() && !settings.allow_experimental_analyzer) + if (local_context->canUseParallelReplicasOnInitiator() && !settings.allow_experimental_analyzer) + { readParallelReplicasImpl(query_plan, column_names, query_info, local_context, processed_stage); - else - readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); -} + return; + } + + if (local_context->canUseParallelReplicasCustomKey() && !settings.allow_experimental_analyzer + && local_context->getClientInfo().distributed_depth == 0) + { + if (auto cluster = local_context->getClusterForParallelReplicas(); + local_context->canUseParallelReplicasCustomKeyForCluster(*cluster)) + { + auto modified_query_info = query_info; + modified_query_info.cluster = std::move(cluster); + ClusterProxy::executeQueryWithParallelReplicasCustomKey( + query_plan, + getStorageID(), + std::move(modified_query_info), + getInMemoryMetadataPtr()->getColumns(), + storage_snapshot, + processed_stage, + query_info.query, + local_context); + return; + } + } + + readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); } void StorageReplicatedMergeTree::readLocalSequentialConsistencyImpl( QueryPlan & query_plan, diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py index 07a9e2badff..cb2c002f237 100644 --- a/tests/integration/test_parallel_replicas_custom_key/test.py +++ b/tests/integration/test_parallel_replicas_custom_key/test.py @@ -5,7 +5,10 @@ cluster = ClickHouseCluster(__file__) nodes = [ cluster.add_instance( - f"n{i}", main_configs=["configs/remote_servers.xml"], with_zookeeper=True + f"n{i}", + main_configs=["configs/remote_servers.xml"], + with_zookeeper=True, + macros={"replica": f"r{i}"}, ) for i in range(1, 5) ] @@ -22,32 +25,21 @@ def start_cluster(): def create_tables(cluster): n1 = nodes[0] - n1.query("DROP TABLE IF EXISTS dist_table") - n1.query(f"DROP TABLE IF EXISTS test_table ON CLUSTER {cluster}") - - n1.query( - f"CREATE TABLE test_table ON CLUSTER {cluster} (key UInt32, value String) Engine=MergeTree ORDER BY (key, sipHash64(value))" - ) - n1.query( - f""" - CREATE TABLE dist_table AS test_table - Engine=Distributed( - {cluster}, - currentDatabase(), - test_table, - rand() - ) - """ - ) + n1.query("DROP TABLE IF EXISTS dist_table SYNC") + n1.query(f"DROP TABLE IF EXISTS test_table ON CLUSTER {cluster} SYNC") -def insert_data(cluster, row_num): - create_tables(cluster) - n1 = nodes[0] - n1.query( - f"INSERT INTO dist_table SELECT number % 4, number FROM numbers({row_num})" +def insert_data(table_name, row_num, all_nodes=False): + query = ( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers({row_num})" ) - n1.query("SYSTEM FLUSH DISTRIBUTED dist_table") + + if all_nodes: + for n in nodes: + n.query(query) + else: + n1 = nodes[0] + n1.query(query) @pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) @@ -56,12 +48,36 @@ def insert_data(cluster, row_num): "cluster", ["test_multiple_shards_multiple_replicas", "test_single_shard_multiple_replicas"], ) -def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter_type): +def test_parallel_replicas_custom_key_distributed( + start_cluster, cluster, custom_key, filter_type +): for node in nodes: node.rotate_logs() row_num = 1000 - insert_data(cluster, row_num) + + n1 = nodes[0] + n1.query(f"DROP TABLE IF EXISTS dist_table ON CLUSTER {cluster} SYNC") + n1.query(f"DROP TABLE IF EXISTS test_table_for_dist ON CLUSTER {cluster} SYNC") + n1.query( + f"CREATE TABLE test_table_for_dist ON CLUSTER {cluster} (key UInt32, value String) Engine=MergeTree ORDER BY (key, sipHash64(value))" + ) + + n1.query( + f""" + CREATE TABLE dist_table AS test_table_for_dist + Engine=Distributed( + {cluster}, + currentDatabase(), + test_table_for_dist, + rand() + ) + """ + ) + + insert_data("dist_table", row_num) + + n1.query("SYSTEM FLUSH DISTRIBUTED dist_table") expected_result = "" for i in range(4): @@ -72,7 +88,6 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter n1.query( "SELECT key, count() FROM dist_table GROUP BY key ORDER BY key", settings={ - "prefer_localhost_replica": 0, "max_parallel_replicas": 4, "parallel_replicas_custom_key": custom_key, "parallel_replicas_custom_key_filter_type": filter_type, @@ -87,3 +102,84 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter node.contains_in_log("Processing query on a replica using custom_key") for node in nodes ) + + +@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) +@pytest.mark.parametrize("filter_type", ["default", "range"]) +@pytest.mark.parametrize( + "cluster", + ["test_single_shard_multiple_replicas"], +) +def test_parallel_replicas_custom_key_mergetree( + start_cluster, cluster, custom_key, filter_type +): + for node in nodes: + node.rotate_logs() + + row_num = 1000 + n1 = nodes[0] + n1.query(f"DROP TABLE IF EXISTS test_table_for_mt ON CLUSTER {cluster} SYNC") + n1.query( + f"CREATE TABLE test_table_for_mt ON CLUSTER {cluster} (key UInt32, value String) Engine=MergeTree ORDER BY (key, sipHash64(value))" + ) + + insert_data("test_table_for_mt", row_num, all_nodes=True) + + expected_result = "" + for i in range(4): + expected_result += f"{i}\t250\n" + + n1 = nodes[0] + assert ( + n1.query( + "SELECT key, count() FROM test_table_for_mt GROUP BY key ORDER BY key", + settings={ + "max_parallel_replicas": 4, + "parallel_replicas_custom_key": custom_key, + "parallel_replicas_custom_key_filter_type": filter_type, + "parallel_replicas_for_non_replicated_merge_tree": 1, + "cluster_for_parallel_replicas": cluster, + }, + ) + == expected_result + ) + + +@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) +@pytest.mark.parametrize("filter_type", ["default", "range"]) +@pytest.mark.parametrize( + "cluster", + ["test_single_shard_multiple_replicas"], +) +def test_parallel_replicas_custom_key_replicatedmergetree( + start_cluster, cluster, custom_key, filter_type +): + for node in nodes: + node.rotate_logs() + + row_num = 1000 + n1 = nodes[0] + n1.query(f"DROP TABLE IF EXISTS test_table_for_rmt ON CLUSTER {cluster} SYNC") + n1.query( + f"CREATE TABLE test_table_for_rmt ON CLUSTER {cluster} (key UInt32, value String) Engine=ReplicatedMergeTree('/clickhouse/tables', '{{replica}}') ORDER BY (key, sipHash64(value))" + ) + + insert_data("test_table_for_rmt", row_num, all_nodes=False) + + expected_result = "" + for i in range(4): + expected_result += f"{i}\t250\n" + + n1 = nodes[0] + assert ( + n1.query( + "SELECT key, count() FROM test_table_for_rmt GROUP BY key ORDER BY key", + settings={ + "max_parallel_replicas": 4, + "parallel_replicas_custom_key": custom_key, + "parallel_replicas_custom_key_filter_type": filter_type, + "cluster_for_parallel_replicas": cluster, + }, + ) + == expected_result + ) diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference deleted file mode 100644 index 8d0f56ba185..00000000000 --- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference +++ /dev/null @@ -1,173 +0,0 @@ -query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='sipHash64(x)' -filter_type='default' max_replicas=1 prefer_localhost_replica=0 -Hello -filter_type='default' max_replicas=2 prefer_localhost_replica=0 -Hello -filter_type='default' max_replicas=3 prefer_localhost_replica=0 -Hello -filter_type='range' max_replicas=1 prefer_localhost_replica=0 -Hello -filter_type='range' max_replicas=2 prefer_localhost_replica=0 -Hello -filter_type='range' max_replicas=3 prefer_localhost_replica=0 -Hello -filter_type='default' max_replicas=1 prefer_localhost_replica=1 -Hello -filter_type='default' max_replicas=2 prefer_localhost_replica=1 -Hello -filter_type='default' max_replicas=3 prefer_localhost_replica=1 -Hello -filter_type='range' max_replicas=1 prefer_localhost_replica=1 -Hello -filter_type='range' max_replicas=2 prefer_localhost_replica=1 -Hello -filter_type='range' max_replicas=3 prefer_localhost_replica=1 -Hello -query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='y' -filter_type='default' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' -filter_type='default' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' -filter_type='default' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -1 diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh deleted file mode 100755 index dccb680be42..00000000000 --- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-parallel, long - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -function run_with_custom_key { - echo "query='$1' with custom_key='$2'" - for prefer_localhost_replica in 0 1; do - for filter_type in 'default' 'range'; do - for max_replicas in {1..3}; do - echo "filter_type='$filter_type' max_replicas=$max_replicas prefer_localhost_replica=$prefer_localhost_replica" - query="$1 SETTINGS max_parallel_replicas=$max_replicas\ - , parallel_replicas_custom_key='$2'\ - , parallel_replicas_custom_key_filter_type='$filter_type'\ - , prefer_localhost_replica=$prefer_localhost_replica" - $CLICKHOUSE_CLIENT --query="$query" - done - done - done -} - -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key"; - -$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String) ENGINE = MergeTree ORDER BY x"; -$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key VALUES ('Hello')"; - -run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)" "sipHash64(x)" - -$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key" - -$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String, y UInt32) ENGINE = MergeTree ORDER BY cityHash64(x)" -$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key SELECT toString(number), number % 3 FROM numbers(1000)" - -function run_count_with_custom_key { - run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y" "$1" -} - -run_count_with_custom_key "y" -run_count_with_custom_key "cityHash64(y)" -run_count_with_custom_key "cityHash64(y) + 1" - -$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) as t1 JOIN 02535_custom_key USING y" --allow_repeated_settings --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with" - -$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key" diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.reference new file mode 100644 index 00000000000..1bb07f0d916 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.reference @@ -0,0 +1,177 @@ +query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt)' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 +Hello +filter_type='default' max_replicas=2 +Hello +filter_type='default' max_replicas=3 +Hello +filter_type='range' max_replicas=1 +Hello +filter_type='range' max_replicas=2 +Hello +filter_type='range' max_replicas=3 +Hello +query='SELECT * FROM 02535_custom_key_mt' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 +Hello +filter_type='default' max_replicas=2 +Hello +filter_type='default' max_replicas=3 +Hello +filter_type='range' max_replicas=1 +Hello +filter_type='range' max_replicas=2 +Hello +filter_type='range' max_replicas=3 +Hello +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +1 diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.sh new file mode 100755 index 00000000000..fad43ea9070 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Tags: no-parallel, long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function run_with_custom_key { + echo "query='$1' with custom_key='$2'" + for filter_type in 'default' 'range'; do + for max_replicas in {1..3}; do + echo "filter_type='$filter_type' max_replicas=$max_replicas" + query="$1 SETTINGS max_parallel_replicas=$max_replicas\ +, parallel_replicas_custom_key='$2'\ +, parallel_replicas_custom_key_filter_type='$filter_type'\ +, parallel_replicas_for_non_replicated_merge_tree=1 \ +, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'" + $CLICKHOUSE_CLIENT --query="$query" + done + done +} + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key_mt"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_mt (x String) ENGINE = MergeTree ORDER BY x"; +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_mt VALUES ('Hello')"; + +run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt)" "sipHash64(x)" +run_with_custom_key "SELECT * FROM 02535_custom_key_mt" "sipHash64(x)" + +$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key_mt" + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_mt (x String, y UInt32) ENGINE = MergeTree ORDER BY cityHash64(x)" +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_mt SELECT toString(number), number % 3 FROM numbers(1000)" + +function run_count_with_custom_key_distributed { + run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key_distributed "y" +run_count_with_custom_key_distributed "cityHash64(y)" +run_count_with_custom_key_distributed "cityHash64(y) + 1" + +function run_count_with_custom_key_merge_tree { + run_with_custom_key "SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key_merge_tree "y" +run_count_with_custom_key_merge_tree "cityHash64(y)" +run_count_with_custom_key_merge_tree "cityHash64(y) + 1" + +$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) as t1 JOIN 02535_custom_key_mt USING y" --allow_repeated_settings --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with" + +$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key_mt" diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.reference new file mode 100644 index 00000000000..c6526b506d3 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.reference @@ -0,0 +1,177 @@ +query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt)' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 +Hello +filter_type='default' max_replicas=2 +Hello +filter_type='default' max_replicas=3 +Hello +filter_type='range' max_replicas=1 +Hello +filter_type='range' max_replicas=2 +Hello +filter_type='range' max_replicas=3 +Hello +query='SELECT * FROM 02535_custom_key_rmt' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 +Hello +filter_type='default' max_replicas=2 +Hello +filter_type='default' max_replicas=3 +Hello +filter_type='range' max_replicas=1 +Hello +filter_type='range' max_replicas=2 +Hello +filter_type='range' max_replicas=3 +Hello +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +1 diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.sh new file mode 100755 index 00000000000..6350f5027f9 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Tags: no-parallel, long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function run_with_custom_key { + echo "query='$1' with custom_key='$2'" + for filter_type in 'default' 'range'; do + for max_replicas in {1..3}; do + echo "filter_type='$filter_type' max_replicas=$max_replicas" + query="$1 SETTINGS max_parallel_replicas=$max_replicas\ +, parallel_replicas_custom_key='$2'\ +, parallel_replicas_custom_key_filter_type='$filter_type'\ +, parallel_replicas_for_non_replicated_merge_tree=1 \ +, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'" + $CLICKHOUSE_CLIENT --query="$query" + done + done +} + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key_rmt"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_rmt (x String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_02535', 'r1') ORDER BY x"; +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_rmt VALUES ('Hello')"; + +run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt)" "sipHash64(x)" +run_with_custom_key "SELECT * FROM 02535_custom_key_rmt" "sipHash64(x)" + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key_rmt_hash"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_rmt_hash (x String, y UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_02535_hash', 'r1') ORDER BY cityHash64(x)" +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_rmt_hash SELECT toString(number), number % 3 FROM numbers(1000)" + +function run_count_with_custom_key { + run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key "y" +run_count_with_custom_key "cityHash64(y)" +run_count_with_custom_key "cityHash64(y) + 1" + +function run_count_with_custom_key_merge_tree { + run_with_custom_key "SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key_merge_tree "y" +run_count_with_custom_key_merge_tree "cityHash64(y)" +run_count_with_custom_key_merge_tree "cityHash64(y) + 1" + +$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) as t1 JOIN 02535_custom_key_rmt_hash USING y" --allow_repeated_settings --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with" + +$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key_rmt_hash" From de747a66b8e84ba350e6ed092b8ae1bef0ac7748 Mon Sep 17 00:00:00 2001 From: divanik Date: Wed, 19 Jun 2024 12:56:24 +0000 Subject: [PATCH 132/439] Fix style check --- tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh | 6 +++--- .../0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh index d317b2e8a1e..1638a3ff9c3 100755 --- a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh +++ b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh @@ -14,7 +14,7 @@ function run_test_for_disk() echo "$disk" - clickhouse-disks -C "$config" --disk "$disk" --query "write --path-from "$config" $CLICKHOUSE_DATABASE/test" + clickhouse-disks -C "$config" --disk "$disk" --query "write --path-from $config $CLICKHOUSE_DATABASE/test" clickhouse-disks -C "$config" --log-level test --disk "$disk" --query "copy $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { grep -o -e "Single part upload has completed." -e "Single operation copy has completed." } @@ -29,9 +29,9 @@ function run_test_copy_from_s3_to_s3(){ local disk_dest=$1 && shift echo "copy from $disk_src to $disk_dest" - clickhouse-disks -C "$config" --disk "$disk_src" --query "write --path-from "$config" $CLICKHOUSE_DATABASE/test" + clickhouse-disks -C "$config" --disk "$disk_src" --query "write --path-from $config $CLICKHOUSE_DATABASE/test" - clickhouse-disks -C "$config" --log-level test --query "copy --disk-from "$disk_src" --disk-to "$disk_dest" $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { + clickhouse-disks -C "$config" --log-level test --query "copy --disk-from $disk_src --disk-to $disk_dest $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { grep -o -e "Single part upload has completed." -e "Single operation copy has completed." } clickhouse-disks -C "$config" --disk "$disk_dest" --query "remove $CLICKHOUSE_DATABASE/test.copy/test" diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh index e6427ab26f8..d543f7195a9 100755 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh @@ -49,11 +49,11 @@ path=$($CLICKHOUSE_CLIENT -q "SELECT replace(data_paths[1], 's3_plain', '') FROM path=${path%/} echo "Files before DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive "${path:?}"" | tail -n+2 +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 $CLICKHOUSE_CLIENT -q "detach table data" echo "Files after DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive "$path"" | tail -n+2 +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 # metadata file is left $CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE" From 8fee4799b6fe004f637ae64a3d9f8a8fda6b4e36 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 19 Jun 2024 17:10:27 +0200 Subject: [PATCH 133/439] Fix style --- src/Interpreters/ClusterProxy/executeQuery.cpp | 1 - src/Planner/PlannerJoinTree.cpp | 1 - src/Storages/MergeTree/MergeTreeData.cpp | 1 - src/Storages/StorageMergeTree.cpp | 1 - 4 files changed, 4 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 337eb21dade..58517a09554 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -38,7 +38,6 @@ namespace ErrorCodes { extern const int TOO_LARGE_DISTRIBUTED_DEPTH; extern const int LOGICAL_ERROR; - extern const int CLUSTER_DOESNT_EXIST; extern const int UNEXPECTED_CLUSTER; } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 1dcbd87f495..2f7b9e9efaa 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -81,7 +81,6 @@ namespace ErrorCodes extern const int TOO_MANY_COLUMNS; extern const int UNSUPPORTED_METHOD; extern const int BAD_ARGUMENTS; - extern const int CLUSTER_DOESNT_EXIST; } namespace diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 85e6020ff72..85f436a89c6 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -190,7 +190,6 @@ namespace ErrorCodes extern const int LIMIT_EXCEEDED; extern const int CANNOT_FORGET_PARTITION; extern const int DATA_TYPE_CANNOT_BE_USED_IN_KEY; - extern const int CLUSTER_DOESNT_EXIST; } static void checkSuspiciousIndices(const ASTFunction * index_function) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 95a0fe13567..05de77cae70 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -67,7 +67,6 @@ namespace ErrorCodes extern const int ABORTED; extern const int SUPPORT_IS_DISABLED; extern const int TABLE_IS_READ_ONLY; - extern const int CLUSTER_DOESNT_EXIST; } namespace ActionLocks From 6ebf8ab45b44f80a85194682f14750cbcb5885f2 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 20 Jun 2024 10:02:22 +0200 Subject: [PATCH 134/439] Fix no shards case --- .../ClusterProxy/executeQuery.cpp | 26 ++++---- src/Storages/StorageDistributed.cpp | 17 ++++- src/Storages/StorageMergeTree.cpp | 63 +++++++++---------- 3 files changed, 60 insertions(+), 46 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 58517a09554..47fbf35233a 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -251,19 +251,6 @@ void executeQuery( if (settings.max_distributed_depth && context->getClientInfo().distributed_depth >= settings.max_distributed_depth) throw Exception(ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH, "Maximum distributed depth exceeded"); - /// Return directly (with correct header) if no shard to query. - if (query_info.getCluster()->getShardsInfo().empty()) - { - if (settings.allow_experimental_analyzer) - return; - - Pipe pipe(std::make_shared(header)); - auto read_from_pipe = std::make_unique(std::move(pipe)); - read_from_pipe->setStepDescription("Read from NullSource (Distributed)"); - query_plan.addStep(std::move(read_from_pipe)); - return; - } - ClusterProxy::AdditionalShardFilterGenerator shard_filter_generator; if (context->canUseParallelReplicasCustomKeyForCluster(*query_info.getCluster())) { @@ -592,6 +579,19 @@ void executeQueryWithParallelReplicasCustomKey( const Block & header, ContextPtr context) { + /// Return directly (with correct header) if no shard to query. + if (query_info.getCluster()->getShardsInfo().empty()) + { + if (context->getSettingsRef().allow_experimental_analyzer) + return; + + Pipe pipe(std::make_shared(header)); + auto read_from_pipe = std::make_unique(std::move(pipe)); + read_from_pipe->setStepDescription("Read from NullSource (Distributed)"); + query_plan.addStep(std::move(read_from_pipe)); + return; + } + ColumnsDescriptionByShardNum columns_object; if (hasDynamicSubcolumns(columns)) columns_object = getExtendedObjectsOfRemoteTables(*query_info.cluster, storage_id, columns, context); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index ea087230a8e..1dd42d79d88 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -864,13 +864,28 @@ void StorageDistributed::read( header = InterpreterSelectQuery(modified_query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); } - if (!local_context->getSettingsRef().allow_experimental_analyzer) + const auto & settings = local_context->getSettingsRef(); + if (!settings.allow_experimental_analyzer) { modified_query_info.query = ClusterProxy::rewriteSelectQuery( local_context, modified_query_info.query, remote_database, remote_table, remote_table_function_ptr); } + /// Return directly (with correct header) if no shard to query. + if (modified_query_info.getCluster()->getShardsInfo().empty()) + { + if (settings.allow_experimental_analyzer) + return; + + Pipe pipe(std::make_shared(header)); + auto read_from_pipe = std::make_unique(std::move(pipe)); + read_from_pipe->setStepDescription("Read from NullSource (Distributed)"); + query_plan.addStep(std::move(read_from_pipe)); + + return; + } + const auto & snapshot_data = assert_cast(*storage_snapshot->data); ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory( diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 05de77cae70..868c70499bc 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1,52 +1,51 @@ -#include "StorageMergeTree.h" -#include "Core/QueryProcessingStage.h" -#include "Interpreters/ClientInfo.h" -#include "Storages/MergeTree/IMergeTreeDataPart.h" +#include #include #include -#include -#include #include +#include #include -#include "Common/Exception.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include #include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include namespace DB From 6cce73bd99d1091a8bfd978e55a7e6d956836bc1 Mon Sep 17 00:00:00 2001 From: divanik Date: Thu, 20 Jun 2024 15:22:38 +0000 Subject: [PATCH 135/439] Fixed some tests --- src/Disks/DiskSelector.cpp | 3 ++ tests/integration/test_store_cleanup/test.py | 5 +++- ...n_DROP_TABLE_ReplicatedMergeTree.reference | 28 +++++++++---------- ...s3_plain_DROP_TABLE_ReplicatedMergeTree.sh | 4 +-- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index b187b491dc0..c29a24132c4 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -86,6 +86,7 @@ DiskSelectorPtr DiskSelector::updateFromConfig( std::shared_ptr result = std::make_shared(*this); constexpr auto default_disk_name = "default"; + constexpr auto local_disk_name = "local"; DisksMap old_disks_minus_new_disks(result->getDisksMap()); for (const auto & disk_name : keys) @@ -109,6 +110,8 @@ DiskSelectorPtr DiskSelector::updateFromConfig( } old_disks_minus_new_disks.erase(default_disk_name); + old_disks_minus_new_disks.erase(local_disk_name); + if (!old_disks_minus_new_disks.empty()) { diff --git a/tests/integration/test_store_cleanup/test.py b/tests/integration/test_store_cleanup/test.py index 6c5a20a758a..aebfde694b3 100644 --- a/tests/integration/test_store_cleanup/test.py +++ b/tests/integration/test_store_cleanup/test.py @@ -1,3 +1,4 @@ +from time import sleep import pytest from helpers.cluster import ClickHouseCluster @@ -153,7 +154,9 @@ def test_store_cleanup(started_cluster): "directories from store", timeout=90, look_behind_lines=1000000 ) node1.wait_for_log_line( - "Nothing to clean up from store/", timeout=90, look_behind_lines=1000000 + "Nothing to clean up from store/ on disk default", + timeout=90, + look_behind_lines=1000000, ) store = node1.exec_in_container(["ls", f"{path_to_data}/store"]) diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference index 1e191b719a5..21b38a94cee 100644 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference @@ -4,27 +4,27 @@ Files before DETACH TABLE all_X_X_X backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json Files after DETACH TABLE all_X_X_X backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh index b079e67a000..2500529186e 100755 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh @@ -55,14 +55,14 @@ path=${path%/} echo "Files before DETACH TABLE" # sed to match any part, since in case of fault injection part name may not be all_0_0_0 but all_1_1_0 -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "${path:?}" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' +clickhouse-disks -C "$config" --disk s3_plain_disk -query "list --recursive $path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' $CLICKHOUSE_CLIENT -nm -q " detach table data_read; detach table data_write; " echo "Files after DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "$path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' # metadata file is left $CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE" From 90faa7b1eced87e0f2979e792d6569d1e2e005e8 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 19 Jun 2024 21:44:37 +0100 Subject: [PATCH 136/439] impl --- src/Common/CgroupsMemoryUsageObserver.cpp | 178 +++++++++++----------- src/Common/CgroupsMemoryUsageObserver.h | 14 +- 2 files changed, 102 insertions(+), 90 deletions(-) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 8a4792f0a5a..20db6a64a31 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -1,3 +1,5 @@ +#include +#include #include #if defined(OS_LINUX) @@ -28,8 +30,6 @@ namespace DB namespace ErrorCodes { - extern const int CANNOT_CLOSE_FILE; - extern const int CANNOT_OPEN_FILE; extern const int FILE_DOESNT_EXIST; extern const int INCORRECT_DATA; } @@ -107,6 +107,75 @@ void CgroupsMemoryUsageObserver::setOnMemoryAmountAvailableChangedFn(OnMemoryAmo namespace { +/// Format is +/// kernel 5 +/// rss 15 +/// [...] +uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key) +{ + while (!buf.eof()) + { + std::string current_key; + readStringUntilWhitespace(current_key, buf); + if (current_key != key) + { + std::string dummy; + readStringUntilNewlineInto(dummy, buf); + buf.ignore(); + continue; + } + + assertChar(' ', buf); + uint64_t mem_usage = 0; + readIntText(mem_usage, buf); + return mem_usage; + } + + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find '{}' in '{}'", key, buf.getFileName()); +} + +struct CgroupsV1Reader : ICgroupsReader +{ + CgroupsV1Reader(const std::filesystem::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } + + uint64_t readMemoryUsage() override + { + std::lock_guard lock(mutex); + buf.rewind(); + return readMetricFromStatFile(buf, "rss"); + } + +private: + std::mutex mutex; + ReadBufferFromFile buf TSA_GUARDED_BY(mutex); +}; + +struct CgroupsV2Reader : ICgroupsReader +{ + CgroupsV2Reader(const std::filesystem::path & stat_file_dir) + : current_buf(stat_file_dir / "memory.current"), stat_buf(stat_file_dir / "memory.stat") + { + } + + uint64_t readMemoryUsage() override + { + std::lock_guard lock(mutex); + current_buf.rewind(); + stat_buf.rewind(); + + uint64_t mem_usage = 0; + /// memory.current contains a single number + readIntText(mem_usage, current_buf); + mem_usage -= readMetricFromStatFile(stat_buf, "inactive_file"); + return mem_usage; + } + +private: + std::mutex mutex; + ReadBufferFromFile current_buf TSA_GUARDED_BY(mutex); + ReadBufferFromFile stat_buf TSA_GUARDED_BY(mutex); +}; + /// Caveats: /// - All of the logic in this file assumes that the current process is the only process in the /// containing cgroup (or more precisely: the only process with significant memory consumption). @@ -117,7 +186,7 @@ namespace /// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such /// systems existed only for a short transition period. -std::optional getCgroupsV2FileName() +std::optional getCgroupsV2Path() { if (!cgroupsV2Enabled()) return {}; @@ -132,29 +201,30 @@ std::optional getCgroupsV2FileName() /// level, try again at the parent level as memory settings are inherited. while (current_cgroup != default_cgroups_mount.parent_path()) { - auto path = current_cgroup / "memory.current"; - if (std::filesystem::exists(path)) - return {path}; + const auto current_path = current_cgroup / "memory.current"; + const auto stat_path = current_cgroup / "memory.stat"; + if (std::filesystem::exists(current_path) && std::filesystem::exists(stat_path)) + return {current_cgroup}; current_cgroup = current_cgroup.parent_path(); } return {}; } -std::optional getCgroupsV1FileName() +std::optional getCgroupsV1Path() { auto path = default_cgroups_mount / "memory/memory.stat"; if (!std::filesystem::exists(path)) return {}; - return {path}; + return {default_cgroups_mount / "memory"}; } -std::pair getCgroupsFileName() +std::pair getCgroupsPath() { - auto v2_file_name = getCgroupsV2FileName(); + auto v2_file_name = getCgroupsV2Path(); if (v2_file_name.has_value()) return {*v2_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; - auto v1_file_name = getCgroupsV1FileName(); + auto v1_file_name = getCgroupsV1Path(); if (v1_file_name.has_value()) return {*v1_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V1}; @@ -166,87 +236,25 @@ std::pair getCgroupsFil CgroupsMemoryUsageObserver::MemoryUsageFile::MemoryUsageFile(LoggerPtr log_) : log(log_) { - std::tie(file_name, version) = getCgroupsFileName(); + const auto [cgroup_path, version] = getCgroupsPath(); - LOG_INFO(log, "Will read the current memory usage from '{}' (cgroups version: {})", file_name, (version == CgroupsVersion::V1) ? "v1" : "v2"); + if (version == CgroupsVersion::V2) + cgroup_reader = std::make_unique(cgroup_path); + else + cgroup_reader = std::make_unique(cgroup_path); - fd = ::open(file_name.data(), O_RDONLY); - if (fd == -1) - ErrnoException::throwFromPath( - (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE, - file_name, "Cannot open file '{}'", file_name); -} - -CgroupsMemoryUsageObserver::MemoryUsageFile::~MemoryUsageFile() -{ - assert(fd != -1); - if (::close(fd) != 0) - { - try - { - ErrnoException::throwFromPath( - ErrorCodes::CANNOT_CLOSE_FILE, - file_name, "Cannot close file '{}'", file_name); - } - catch (const ErrnoException &) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); - } - } + LOG_INFO( + log, + "Will read the current memory usage from '{}' (cgroups version: {})", + cgroup_path, + (version == CgroupsVersion::V1) ? "v1" : "v2"); } uint64_t CgroupsMemoryUsageObserver::MemoryUsageFile::readMemoryUsage() const { - /// File read is probably not read is thread-safe, just to be sure - std::lock_guard lock(mutex); - - ReadBufferFromFileDescriptor buf(fd); - buf.rewind(); - - uint64_t mem_usage = 0; - - switch (version) - { - case CgroupsVersion::V1: - { - /// Format is - /// kernel 5 - /// rss 15 - /// [...] - std::string key; - bool found_rss = false; - - while (!buf.eof()) - { - readStringUntilWhitespace(key, buf); - if (key != "rss") - { - std::string dummy; - readStringUntilNewlineInto(dummy, buf); - buf.ignore(); - continue; - } - - assertChar(' ', buf); - readIntText(mem_usage, buf); - found_rss = true; - break; - } - - if (!found_rss) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find 'rss' in '{}'", file_name); - - break; - } - case CgroupsVersion::V2: - { - readIntText(mem_usage, buf); - break; - } - } - + chassert(cgroup_reader); + const auto mem_usage = cgroup_reader->readMemoryUsage(); LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(mem_usage)); - return mem_usage; } diff --git a/src/Common/CgroupsMemoryUsageObserver.h b/src/Common/CgroupsMemoryUsageObserver.h index edc1cee750a..62bbabb9e86 100644 --- a/src/Common/CgroupsMemoryUsageObserver.h +++ b/src/Common/CgroupsMemoryUsageObserver.h @@ -3,11 +3,19 @@ #include #include +#include #include namespace DB { +struct ICgroupsReader +{ + virtual ~ICgroupsReader() = default; + + virtual uint64_t readMemoryUsage() = 0; +}; + /// Does two things: /// 1. Periodically reads the memory usage of the process from Linux cgroups. /// You can specify soft or hard memory limits: @@ -66,14 +74,10 @@ private: { public: explicit MemoryUsageFile(LoggerPtr log_); - ~MemoryUsageFile(); uint64_t readMemoryUsage() const; private: LoggerPtr log; - mutable std::mutex mutex; - int fd TSA_GUARDED_BY(mutex) = -1; - CgroupsVersion version; - std::string file_name; + std::unique_ptr cgroup_reader; }; MemoryUsageFile memory_usage_file; From 1fa5212836219f89fe2ea8877d882daf0a928bce Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 20 Jun 2024 17:49:51 +0100 Subject: [PATCH 137/439] remove MemoryUsageFile --- src/Common/CgroupsMemoryUsageObserver.cpp | 153 +++++++++++----------- src/Common/CgroupsMemoryUsageObserver.h | 15 +-- 2 files changed, 75 insertions(+), 93 deletions(-) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 20db6a64a31..23bfec4322b 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -24,84 +24,17 @@ #define STRINGIFY(x) STRINGIFY_HELPER(x) #endif +using namespace DB; namespace DB { namespace ErrorCodes { - extern const int FILE_DOESNT_EXIST; - extern const int INCORRECT_DATA; +extern const int FILE_DOESNT_EXIST; +extern const int INCORRECT_DATA; } -CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_) - : log(getLogger("CgroupsMemoryUsageObserver")) - , wait_time(wait_time_) - , memory_usage_file(log) -{ - LOG_INFO(log, "Initialized cgroups memory limit observer, wait time is {} sec", wait_time.count()); -} - -CgroupsMemoryUsageObserver::~CgroupsMemoryUsageObserver() -{ - stopThread(); -} - -void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint64_t soft_limit_) -{ - std::lock_guard limit_lock(limit_mutex); - - if (hard_limit_ == hard_limit && soft_limit_ == soft_limit) - return; - - hard_limit = hard_limit_; - soft_limit = soft_limit_; - - on_hard_limit = [this, hard_limit_](bool up) - { - if (up) - { - LOG_WARNING(log, "Exceeded hard memory limit ({})", ReadableSize(hard_limit_)); - - /// Update current usage in memory tracker. Also reset free_memory_in_allocator_arenas to zero though we don't know if they are - /// really zero. Trying to avoid OOM ... - MemoryTracker::setRSS(hard_limit_, 0); - } - else - { - LOG_INFO(log, "Dropped below hard memory limit ({})", ReadableSize(hard_limit_)); - } - }; - - on_soft_limit = [this, soft_limit_](bool up) - { - if (up) - { - LOG_WARNING(log, "Exceeded soft memory limit ({})", ReadableSize(soft_limit_)); - -#if USE_JEMALLOC - LOG_INFO(log, "Purging jemalloc arenas"); - mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); -#endif - /// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them. - uint64_t memory_usage = memory_usage_file.readMemoryUsage(); - MemoryTracker::setRSS(memory_usage, 0); - - LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage)); - } - else - { - LOG_INFO(log, "Dropped below soft memory limit ({})", ReadableSize(soft_limit_)); - } - }; - - LOG_INFO(log, "Set new limits, soft limit: {}, hard limit: {}", ReadableSize(soft_limit_), ReadableSize(hard_limit_)); -} - -void CgroupsMemoryUsageObserver::setOnMemoryAmountAvailableChangedFn(OnMemoryAmountAvailableChangedFn on_memory_amount_available_changed_) -{ - std::lock_guard memory_amount_available_changed_lock(memory_amount_available_changed_mutex); - on_memory_amount_available_changed = on_memory_amount_available_changed_; } namespace @@ -233,8 +166,11 @@ std::pair getCgroupsPat } -CgroupsMemoryUsageObserver::MemoryUsageFile::MemoryUsageFile(LoggerPtr log_) - : log(log_) +namespace DB +{ + +CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_) + : log(getLogger("CgroupsMemoryUsageObserver")), wait_time(wait_time_) { const auto [cgroup_path, version] = getCgroupsPath(); @@ -245,17 +181,73 @@ CgroupsMemoryUsageObserver::MemoryUsageFile::MemoryUsageFile(LoggerPtr log_) LOG_INFO( log, - "Will read the current memory usage from '{}' (cgroups version: {})", + "Will read the current memory usage from '{}' (cgroups version: {}), wait time is {} sec", cgroup_path, - (version == CgroupsVersion::V1) ? "v1" : "v2"); + (version == CgroupsVersion::V1) ? "v1" : "v2", + wait_time.count()); } -uint64_t CgroupsMemoryUsageObserver::MemoryUsageFile::readMemoryUsage() const +CgroupsMemoryUsageObserver::~CgroupsMemoryUsageObserver() { - chassert(cgroup_reader); - const auto mem_usage = cgroup_reader->readMemoryUsage(); - LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(mem_usage)); - return mem_usage; + stopThread(); +} + +void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint64_t soft_limit_) +{ + std::lock_guard limit_lock(limit_mutex); + + if (hard_limit_ == hard_limit && soft_limit_ == soft_limit) + return; + + hard_limit = hard_limit_; + soft_limit = soft_limit_; + + on_hard_limit = [this, hard_limit_](bool up) + { + if (up) + { + LOG_WARNING(log, "Exceeded hard memory limit ({})", ReadableSize(hard_limit_)); + + /// Update current usage in memory tracker. Also reset free_memory_in_allocator_arenas to zero though we don't know if they are + /// really zero. Trying to avoid OOM ... + MemoryTracker::setRSS(hard_limit_, 0); + } + else + { + LOG_INFO(log, "Dropped below hard memory limit ({})", ReadableSize(hard_limit_)); + } + }; + + on_soft_limit = [this, soft_limit_](bool up) + { + if (up) + { + LOG_WARNING(log, "Exceeded soft memory limit ({})", ReadableSize(soft_limit_)); + +# if USE_JEMALLOC + LOG_INFO(log, "Purging jemalloc arenas"); + mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); +# endif + /// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them. + uint64_t memory_usage = cgroup_reader->readMemoryUsage(); + LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(memory_usage)); + MemoryTracker::setRSS(memory_usage, 0); + + LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage)); + } + else + { + LOG_INFO(log, "Dropped below soft memory limit ({})", ReadableSize(soft_limit_)); + } + }; + + LOG_INFO(log, "Set new limits, soft limit: {}, hard limit: {}", ReadableSize(soft_limit_), ReadableSize(hard_limit_)); +} + +void CgroupsMemoryUsageObserver::setOnMemoryAmountAvailableChangedFn(OnMemoryAmountAvailableChangedFn on_memory_amount_available_changed_) +{ + std::lock_guard memory_amount_available_changed_lock(memory_amount_available_changed_mutex); + on_memory_amount_available_changed = on_memory_amount_available_changed_; } void CgroupsMemoryUsageObserver::startThread() @@ -309,7 +301,8 @@ void CgroupsMemoryUsageObserver::runThread() std::lock_guard limit_lock(limit_mutex); if (soft_limit > 0 && hard_limit > 0) { - uint64_t memory_usage = memory_usage_file.readMemoryUsage(); + uint64_t memory_usage = cgroup_reader->readMemoryUsage(); + LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(memory_usage)); if (memory_usage > hard_limit) { if (last_memory_usage <= hard_limit) diff --git a/src/Common/CgroupsMemoryUsageObserver.h b/src/Common/CgroupsMemoryUsageObserver.h index 62bbabb9e86..b848a2bff3c 100644 --- a/src/Common/CgroupsMemoryUsageObserver.h +++ b/src/Common/CgroupsMemoryUsageObserver.h @@ -69,23 +69,12 @@ private: uint64_t last_memory_usage = 0; /// how much memory does the process use uint64_t last_available_memory_amount; /// how much memory can the process use - /// Represents the cgroup virtual file that shows the memory consumption of the process's cgroup. - struct MemoryUsageFile - { - public: - explicit MemoryUsageFile(LoggerPtr log_); - uint64_t readMemoryUsage() const; - private: - LoggerPtr log; - std::unique_ptr cgroup_reader; - }; - - MemoryUsageFile memory_usage_file; - void stopThread(); void runThread(); + std::unique_ptr cgroup_reader; + std::mutex thread_mutex; std::condition_variable cond; ThreadFromGlobalPool thread; From 0f0e1cee63a8e2047f2092d7db4e81c5a3b53572 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 20 Jun 2024 20:13:59 +0100 Subject: [PATCH 138/439] fix tidy --- src/Common/CgroupsMemoryUsageObserver.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 23bfec4322b..c37e3c74db9 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -1,5 +1,5 @@ +#include #include -#include #include #if defined(OS_LINUX) @@ -69,7 +69,7 @@ uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & ke struct CgroupsV1Reader : ICgroupsReader { - CgroupsV1Reader(const std::filesystem::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } + explicit CgroupsV1Reader(const std::filesystem::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } uint64_t readMemoryUsage() override { @@ -85,7 +85,7 @@ private: struct CgroupsV2Reader : ICgroupsReader { - CgroupsV2Reader(const std::filesystem::path & stat_file_dir) + explicit CgroupsV2Reader(const std::filesystem::path & stat_file_dir) : current_buf(stat_file_dir / "memory.current"), stat_buf(stat_file_dir / "memory.stat") { } From 3786ca72d0f1e0d8ae4d84afc31e8fe061698ed0 Mon Sep 17 00:00:00 2001 From: pufit Date: Fri, 21 Jun 2024 02:02:30 -0400 Subject: [PATCH 139/439] docs, review fixes --- docs/en/operations/startup-scripts.md | 31 +++++++++++++++++++ programs/server/Server.cpp | 14 +++++++-- src/Core/ServerSettings.h | 2 +- src/Interpreters/Context.cpp | 6 ++++ src/Interpreters/Context.h | 1 + .../test_startup_scripts/configs/config.xml | 10 ++++++ 6 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 docs/en/operations/startup-scripts.md diff --git a/docs/en/operations/startup-scripts.md b/docs/en/operations/startup-scripts.md new file mode 100644 index 00000000000..c7842c1472b --- /dev/null +++ b/docs/en/operations/startup-scripts.md @@ -0,0 +1,31 @@ +--- +slug: /en/operations/startup-scripts.md +sidebar_position: 70 +sidebar_label: Startup Scripts +--- + +# Startup Scripts + +ClickHouse can run arbitrary SQL queries from the server configuration during startup. This can be useful for migrations or automatic schema creation. + +```xml + + + + CREATE ROLE OR REPLACE test_role + + + CREATE TABLE TestTable (id UInt64) ENGINE=TinyLog + SELECT 1; + + + +``` + +ClickHouse executes all queries from the `startup_scripts` sequentially in the specified order. If any of the queries fail, the execution of the following queries won't be interrupted. + +You can specify a conditional query in the config. In that case, the corresponding query executes only when the condition query returns the value `1` or `true`. + +:::note +If the condition query returns any other value than `1` or `true`, the result will be interpreted as `false`, and the corresponding won't be executed. +::: diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 11113ce0c0f..02cad419fff 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -638,7 +638,12 @@ void loadStartupScripts(const Poco::Util::AbstractConfiguration & config, Contex auto result = condition_write_buffer.str(); if (result != "1\n" && result != "true\n") + { + if (result != "0\n" && result != "false\n") + context->addWarningMessage(fmt::format("The condition query returned `{}`, which can't be interpreted as a boolean (`0`, `false`, `1`, `true`). Will skip this query.", result)); + continue; + } LOG_DEBUG(log, "Condition is true, will execute the query next"); } @@ -651,9 +656,9 @@ void loadStartupScripts(const Poco::Util::AbstractConfiguration & config, Contex executeQuery(read_buffer, write_buffer, true, context, callback, QueryFlags{ .internal = true }, std::nullopt, {}); } } - catch (const std::exception & e) + catch (...) { - LOG_ERROR(log, "Failed to parse startup scripts file {}", e.what()); + tryLogCurrentException(log, "Failed to parse startup scripts file"); } } @@ -2014,6 +2019,11 @@ try /// otherwise there is a race condition between the system database initialization /// and creation of new tables in the database. waitLoad(TablesLoaderForegroundPoolId, system_startup_tasks); + + /// Startup scripts can depend on the system log tables. + if (config().has("startup_scripts") && !server_settings.prepare_system_log_tables_on_startup.changed) + global_context->setServerSetting("prepare_system_log_tables_on_startup", true); + /// After attaching system databases we can initialize system log. global_context->initializeSystemLogs(); global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index cf220457c51..8ce3bb5394b 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -152,7 +152,7 @@ namespace DB M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ M(Double, gwp_asan_force_sample_probability, 0, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ - M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `_log` tables before the startup. It can be helpful if some startup scripts depend on `_log` tables.", 0) \ + M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 2807807b294..367b5c32e4f 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2232,6 +2232,12 @@ void Context::setSetting(std::string_view name, const Field & value) contextSanityClampSettingsWithLock(*this, settings, lock); } +void Context::setServerSetting(std::string_view name, const Field & value) +{ + std::lock_guard lock(mutex); + shared->server_settings.set(name, value); +} + void Context::applySettingChange(const SettingChange & change) { try diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index b3ade94ccdc..9f2c600e9fb 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -825,6 +825,7 @@ public: /// Set settings by name. void setSetting(std::string_view name, const String & value); void setSetting(std::string_view name, const Field & value); + void setServerSetting(std::string_view name, const Field & value); void applySettingChange(const SettingChange & change); void applySettingsChanges(const SettingsChanges & changes); diff --git a/tests/integration/test_startup_scripts/configs/config.xml b/tests/integration/test_startup_scripts/configs/config.xml index 42c1965c66d..98cce305a25 100644 --- a/tests/integration/test_startup_scripts/configs/config.xml +++ b/tests/integration/test_startup_scripts/configs/config.xml @@ -10,5 +10,15 @@ CREATE TABLE TestTable (id UInt64) ENGINE=TinyLog SELECT 1; + + SELECT * FROM system.query_log LIMIT 1 + + + + system + query_log
+ ENGINE = MergeTree ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024 + 7500 +
From b83941c42759e417611c77c19c102b17db7f067b Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 21 Jun 2024 11:37:14 +0000 Subject: [PATCH 140/439] Force push to sync From b936d77eb159e1532cb15c6f40befcc15dacdf2a Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 21 Jun 2024 14:17:13 +0000 Subject: [PATCH 141/439] Try to bump sync From 9f3024cfd610d34ed7262dd6d019300ca314dd52 Mon Sep 17 00:00:00 2001 From: pufit Date: Fri, 21 Jun 2024 12:10:49 -0400 Subject: [PATCH 142/439] fix tests --- .../test_startup_scripts/configs/config.d/query_log.xml | 8 ++++++++ .../configs/{config.xml => config.d/startup_scripts.xml} | 7 ------- tests/integration/test_startup_scripts/test.py | 5 ++++- 3 files changed, 12 insertions(+), 8 deletions(-) create mode 100644 tests/integration/test_startup_scripts/configs/config.d/query_log.xml rename tests/integration/test_startup_scripts/configs/{config.xml => config.d/startup_scripts.xml} (67%) diff --git a/tests/integration/test_startup_scripts/configs/config.d/query_log.xml b/tests/integration/test_startup_scripts/configs/config.d/query_log.xml new file mode 100644 index 00000000000..24d66fc674e --- /dev/null +++ b/tests/integration/test_startup_scripts/configs/config.d/query_log.xml @@ -0,0 +1,8 @@ + + + system + query_log
+ toYYYYMM(event_date) + 1000 +
+
diff --git a/tests/integration/test_startup_scripts/configs/config.xml b/tests/integration/test_startup_scripts/configs/config.d/startup_scripts.xml similarity index 67% rename from tests/integration/test_startup_scripts/configs/config.xml rename to tests/integration/test_startup_scripts/configs/config.d/startup_scripts.xml index 98cce305a25..e8a711a926a 100644 --- a/tests/integration/test_startup_scripts/configs/config.xml +++ b/tests/integration/test_startup_scripts/configs/config.d/startup_scripts.xml @@ -14,11 +14,4 @@ SELECT * FROM system.query_log LIMIT 1 - - - system - query_log
- ENGINE = MergeTree ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024 - 7500 -
diff --git a/tests/integration/test_startup_scripts/test.py b/tests/integration/test_startup_scripts/test.py index ee61994f830..43a871a6fc5 100644 --- a/tests/integration/test_startup_scripts/test.py +++ b/tests/integration/test_startup_scripts/test.py @@ -6,7 +6,10 @@ def test_startup_scripts(): node = cluster.add_instance( "node", - main_configs=["configs/config.xml"], + main_configs=[ + "configs/config.d/query_log.xml", + "configs/config.d/startup_scripts.xml", + ], with_zookeeper=False, ) From 504cc3b0901091fd64f83f652478f078f1c79c50 Mon Sep 17 00:00:00 2001 From: divanik Date: Fri, 21 Jun 2024 16:27:21 +0000 Subject: [PATCH 143/439] Fix some tests --- programs/disks/DisksApp.cpp | 2 +- src/Disks/DiskLocal.cpp | 1 - src/Disks/DiskSelector.cpp | 17 ++++++++++++----- src/Disks/DiskSelector.h | 6 +++++- ...0_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh | 4 +++- 5 files changed, 21 insertions(+), 9 deletions(-) diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 3b09feecc3b..9ef051a2ece 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -480,7 +480,7 @@ int DisksApp::main(const std::vector & /*args*/) auto validator = [](const Poco::Util::AbstractConfiguration &, const std::string &, const std::string &) { return true; }; constexpr auto config_prefix = "storage_configuration.disks"; - auto disk_selector = std::make_shared(std::unordered_set{"cache", "encrypted"}); + auto disk_selector = std::make_shared(std::unordered_set{"cache", "encrypted"}, /*create_local=*/true); disk_selector->initialize(config(), config_prefix, global_context, validator); std::vector>> disks_with_path; diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 6cb2599b82a..d1f0a928b1d 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -276,7 +276,6 @@ bool DiskLocal::isFile(const String & path) const bool DiskLocal::isDirectory(const String & path) const { - // std::cerr << fs::path(disk_path) / path << std::endl; return fs::is_directory(fs::path(disk_path) / path); } diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index c29a24132c4..6e57bdad77e 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -26,7 +26,7 @@ void DiskSelector::assertInitialized() const } -void DiskSelector::initialize( +void DiskSelector::( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator) { Poco::Util::AbstractConfiguration::Keys keys; @@ -66,7 +66,7 @@ void DiskSelector::initialize( default_disk_name, std::make_shared(default_disk_name, context->getPath(), 0, context, config, config_prefix)); } - if (!has_local_disk) + if (!has_local_disk && create_local) disks.emplace(local_disk_name, std::make_shared(local_disk_name, "/", 0, context, config, config_prefix)); is_initialized = true; @@ -97,7 +97,12 @@ DiskSelectorPtr DiskSelector::updateFromConfig( auto disk_config_prefix = config_prefix + "." + disk_name; if (!result->getDisksMap().contains(disk_name)) { - result->addToDiskMap(disk_name, factory.create(disk_name, config, disk_config_prefix, context, result->getDisksMap())); + auto created_disk = factory.create( + disk_name, config, disk_config_prefix, context, result->getDisksMap(), /*attach*/ false, /*custom_disk*/ false, skip_types); + if (created_disk) + { + result->addToDiskMap(disk_name, created_disk); + } } else { @@ -110,8 +115,10 @@ DiskSelectorPtr DiskSelector::updateFromConfig( } old_disks_minus_new_disks.erase(default_disk_name); - old_disks_minus_new_disks.erase(local_disk_name); - + if (create_local) + { + old_disks_minus_new_disks.erase(local_disk_name); + } if (!old_disks_minus_new_disks.empty()) { diff --git a/src/Disks/DiskSelector.h b/src/Disks/DiskSelector.h index fb3cb4a0177..8ceb4a58c15 100644 --- a/src/Disks/DiskSelector.h +++ b/src/Disks/DiskSelector.h @@ -20,7 +20,10 @@ class DiskSelector public: static constexpr auto TMP_INTERNAL_DISK_PREFIX = "__tmp_internal_"; - explicit DiskSelector(std::unordered_set skip_types_ = {}) : skip_types(skip_types_) { } + explicit DiskSelector(std::unordered_set skip_types_ = {}, bool create_local_ = false) + : skip_types(skip_types_), create_local(create_local_) + { + } DiskSelector(const DiskSelector & from) = default; using DiskValidator = std::function; @@ -50,6 +53,7 @@ private: void assertInitialized() const; const std::unordered_set skip_types; + const bool create_local; }; } diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh index 2500529186e..7725688d225 100755 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh @@ -55,13 +55,15 @@ path=${path%/} echo "Files before DETACH TABLE" # sed to match any part, since in case of fault injection part name may not be all_0_0_0 but all_1_1_0 -clickhouse-disks -C "$config" --disk s3_plain_disk -query "list --recursive $path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' +echo "Path $path" +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' $CLICKHOUSE_CLIENT -nm -q " detach table data_read; detach table data_write; " echo "Files after DETACH TABLE" +echo "Path $path" clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' # metadata file is left From df8341c447ecf1775e27dcdd6ea09829d9e35880 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 21 Jun 2024 18:30:45 +0200 Subject: [PATCH 144/439] Try to improve low number itoa --- base/base/itoa.cpp | 75 ++++++++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 32 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index e7250764704..0997daebbf6 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -3,6 +3,34 @@ #include #include +namespace +{ +ALWAYS_INLINE inline char * outOneDigit(char * p, uint8_t value) +{ + *p = '0' + value; + return p + 1; +} + +// Using a lookup table to convert binary numbers from 0 to 99 +// into ascii characters as described by Andrei Alexandrescu in +// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ +const char digits[201] = "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; +ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) +{ + memcpy(p, &digits[value * 2], 2); + p += 2; + return p; +} + namespace jeaiii { /* @@ -84,43 +112,48 @@ template inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) { constexpr auto q = sizeof(T); - using U = cond>>; + using U = cond>>; // convert bool to int before test with unary + to silence warning if T happens to be bool U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i); - if (n < UInt32(1e2)) + if (n < U(1e2)) { - *reinterpret_cast(b) = digits.fd[n]; - return n < 10 ? b + 1 : b + 2; + return n < 10 ? outOneDigit(b, n) : outTwoDigits(b, n); } if (n < UInt32(1e6)) { - if (n < UInt32(1e4)) + if (sizeof(U) == 1 || n < U(1e4)) { auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * n; *reinterpret_cast(b) = digits.fd[f0 >> 24]; - b -= n < UInt32(1e3); + if constexpr (sizeof(U) == 1) + b -= 1; + else + b -= n < U(1e3); auto f2 = (f0 & mask24) * 100; *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; return b + 4; } auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * n; *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= n < UInt32(1e5); + if constexpr (sizeof(U) == 2) + b -= 1; + else + b -= n < U(1e5); auto f2 = (f0 & mask32) * 100; *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; auto f4 = (f2 & mask32) * 100; *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; return b + 6; } - if (n < UInt64(1ull << 32ull)) + if (sizeof(U) == 4 || n < UInt64(1ull << 32ull)) { - if (n < UInt32(1e8)) + if (n < U(1e8)) { auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= n < UInt32(1e7); + b -= n < U(1e7); auto f2 = (f0 & mask32) * 100; *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; auto f4 = (f2 & mask32) * 100; @@ -248,28 +281,6 @@ inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) } } -namespace -{ -// Using a lookup table to convert binary numbers from 0 to 99 -// into ascii characters as described by Andrei Alexandrescu in -// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ -const char digits[201] = "00010203040506070809" - "10111213141516171819" - "20212223242526272829" - "30313233343536373839" - "40414243444546474849" - "50515253545556575859" - "60616263646566676869" - "70717273747576777879" - "80818283848586878889" - "90919293949596979899"; -ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) -{ - memcpy(p, &digits[value * 2], 2); - p += 2; - return p; -} - const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull; const int max_multiple_of_hundred_blocks = 9; static_assert(max_multiple_of_hundred_that_fits_in_64_bits % 100 == 0); From 7e0ed1b02cb55b0ce5788c9f1cf7c69c163ad14b Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 21 Jun 2024 21:29:46 +0100 Subject: [PATCH 145/439] add test --- src/Common/CgroupsMemoryUsageObserver.cpp | 4 +-- .../test_memory_limit_observer/test.py | 25 ++++++++++++++++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index c37e3c74db9..33393a8b9c6 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -230,7 +230,7 @@ void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint # endif /// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them. uint64_t memory_usage = cgroup_reader->readMemoryUsage(); - LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(memory_usage)); + LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage)); MemoryTracker::setRSS(memory_usage, 0); LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage)); @@ -302,7 +302,7 @@ void CgroupsMemoryUsageObserver::runThread() if (soft_limit > 0 && hard_limit > 0) { uint64_t memory_usage = cgroup_reader->readMemoryUsage(); - LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(memory_usage)); + LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage)); if (memory_usage > hard_limit) { if (last_memory_usage <= hard_limit) diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index fe3acd9a0cf..369b9241f07 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -35,7 +35,7 @@ def get_latest_mem_limit(): ).strip() ) return mem_limit - except Exception as e: + except Exception: time.sleep(1) raise Exception("Cannot get memory limit") @@ -51,3 +51,26 @@ def test_observe_memory_limit(started_cluster): if new_max_mem > original_max_mem: return raise Exception("the memory limit does not increase as expected") + + +def test_memory_usage_doesnt_include_page_cache_size(started_cluster): + # populate page cache with 10GB of data + node1.exec_in_container( + ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=10K"] + ) + + observer_refresh_period = int( + node1.query( + "select value from system.server_settings where name = 'cgroups_memory_usage_observer_wait_time'" + ).strip() + ) + time.sleep(observer_refresh_period + 1) + + max_mem_usage_from_cgroup = node1.query( + """ + SELECT max(toUInt64(replaceRegexpAll(message, 'Read current memory usage (\\d+) bytes.*', '\\1'))) AS max_mem + FROM system.text_log + WHERE logger_name = 'CgroupsMemoryUsageObserver' AND message LIKE 'Read current memory usage%bytes%' + """ + ).strip() + assert int(max_mem_usage_from_cgroup) < 2 * 2 ** 30 From 750c902671bb64f02c7bf6918779561a06711de6 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 21 Jun 2024 20:44:53 +0000 Subject: [PATCH 146/439] Automatic style fix --- tests/integration/test_memory_limit_observer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index 369b9241f07..d8c2a0e8ad7 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -73,4 +73,4 @@ def test_memory_usage_doesnt_include_page_cache_size(started_cluster): WHERE logger_name = 'CgroupsMemoryUsageObserver' AND message LIKE 'Read current memory usage%bytes%' """ ).strip() - assert int(max_mem_usage_from_cgroup) < 2 * 2 ** 30 + assert int(max_mem_usage_from_cgroup) < 2 * 2**30 From b013d95fd5f9c5beacac132ecaf88a26d63d590c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 22 Jun 2024 18:17:32 +0200 Subject: [PATCH 147/439] Provide keeper override for local development (rewrite path to current directory) The same way like clickhouse-server has Signed-off-by: Azat Khuzhin --- programs/keeper/conf.d/local.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 programs/keeper/conf.d/local.yaml diff --git a/programs/keeper/conf.d/local.yaml b/programs/keeper/conf.d/local.yaml new file mode 100644 index 00000000000..722e90e374a --- /dev/null +++ b/programs/keeper/conf.d/local.yaml @@ -0,0 +1,9 @@ +logger: + log: + "@remove": remove + errorlog: + "@remove": remove + console: 1 +keeper_server: + log_storage_path: ./logs + snapshot_storage_path: ./snapshots From 6dc68983e787a7d91d814148514937ed29887279 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 22 Jun 2024 18:17:02 +0200 Subject: [PATCH 148/439] Fix keeper with non-system-wide directories Otherwise it still tries to access default system-wide directory on config reloading: 2024.06.22 20:36:19.860615 [ 31600 ] {} Application: std::exception. Code: 1001, type: std::__1::__fs::filesystem::filesystem_error, e.what() = filesystem error: in create_directories: Permission denied ["/var/lib/clickhouse-keeper"], Stack trace (when copying this message, always include the lines below): 0. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/exception:141: std::runtime_error::runtime_error(String const&) @ 0x0000000016f16a17 1. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/string:1499: std::system_error::system_error(std::error_code, String const&) @ 0x0000000016f1d09f 2. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/__filesystem/filesystem_error.h:42: std::__fs::filesystem::filesystem_error::filesystem_error[abi:v15000](String const&, std::__fs::filesystem::path const&, std::error_code) @ 0x000000000b639ed2 3. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/__filesystem/filesystem_error.h:90: void std::__fs::filesystem::__throw_filesystem_error[abi:v15000](String&, std::__fs::filesystem::path const&, std::error_code const&) @ 0x0000000016ebaf96 4. /src/ch/clickhouse/contrib/llvm-project/libcxx/src/filesystem/filesystem_common.h:173: std::__fs::filesystem::detail::(anonymous namespace)::ErrorHandler::report(std::error_code const&) const @ 0x0000000016ebe416 5. /src/ch/clickhouse/contrib/llvm-project/libcxx/src/filesystem/operations.cpp:1030: std::__fs::filesystem::__create_directories(std::__fs::filesystem::path const&, std::error_code*) @ 0x0000000016ebec3d 6. /src/ch/clickhouse/contrib/llvm-project/libcxx/src/filesystem/filesystem_common.h:161: std::__fs::filesystem::__create_directories(std::__fs::filesystem::path const&, std::error_code*) @ 0x0000000016ebed0e 7. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/string:1499: DB::ConfigProcessor::savePreprocessedConfig(DB::ConfigProcessor::LoadedConfig&, String) @ 0x00000000128362b3 8. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/string:1499: DB::ConfigReloader::reloadIfNewer(bool, bool, bool, bool) @ 0x000000001283c085 9. /src/ch/clickhouse/src/Common/Config/ConfigReloader.cpp:33: DB::ConfigReloader::ConfigReloader(std::basic_string_view>, std::vector> const&, String const&, zkutil::ZooKeeperNodeCache&&, std::shared_ptr const&, std::function, bool)>&&, bool) @ 0x000000001283b457 10. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/__functional/function.h:818: ? @ 0x000000000b686ecd 11. /src/ch/clickhouse/base/poco/Util/src/Application.cpp:0: Poco::Util::Application::run() @ 0x0000000014afb156 12. /src/ch/clickhouse/programs/keeper/Keeper.cpp:165: DB::Keeper::run() @ 0x000000000b68317e 13. /src/ch/clickhouse/base/poco/Util/src/ServerApplication.cpp:132: Poco::Util::ServerApplication::run(int, char**) @ 0x0000000014b0faf2 14. /src/ch/clickhouse/programs/keeper/Keeper.cpp:0: mainEntryClickHouseKeeper(int, char**) @ 0x000000000b68227e 15. /src/ch/clickhouse/programs/main.cpp:0: main @ 0x00000000061d6204 16. ? @ 0x00007ffff7dc2c88 17. ? @ 0x00007ffff7dc2d4c 18. _start @ 0x00000000061d502e Signed-off-by: Azat Khuzhin --- programs/keeper/Keeper.cpp | 57 +++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 0d3c1f10894..ef215911e80 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -272,6 +272,35 @@ HTTPContextPtr httpContext() return std::make_shared(Context::getGlobalContextInstance()); } +String getKeeperPath(Poco::Util::LayeredConfiguration & config) +{ + String path; + if (config.has("keeper_server.storage_path")) + { + path = config.getString("keeper_server.storage_path"); + } + else if (config.has("keeper_server.log_storage_path")) + { + path = std::filesystem::path(config.getString("keeper_server.log_storage_path")).parent_path(); + } + else if (config.has("keeper_server.snapshot_storage_path")) + { + path = std::filesystem::path(config.getString("keeper_server.snapshot_storage_path")).parent_path(); + } + else if (std::filesystem::is_directory(std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination")) + { + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "By default 'keeper_server.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper_server.storage_path' in the keeper configuration explicitly", + KEEPER_DEFAULT_PATH, String{std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination"}); + } + else + { + path = KEEPER_DEFAULT_PATH; + } + return path; +} + + } int Keeper::main(const std::vector & /*args*/) @@ -321,31 +350,7 @@ try updateMemorySoftLimitInConfig(config()); - std::string path; - - if (config().has("keeper_server.storage_path")) - { - path = config().getString("keeper_server.storage_path"); - } - else if (config().has("keeper_server.log_storage_path")) - { - path = std::filesystem::path(config().getString("keeper_server.log_storage_path")).parent_path(); - } - else if (config().has("keeper_server.snapshot_storage_path")) - { - path = std::filesystem::path(config().getString("keeper_server.snapshot_storage_path")).parent_path(); - } - else if (std::filesystem::is_directory(std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination")) - { - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, - "By default 'keeper_server.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper_server.storage_path' in the keeper configuration explicitly", - KEEPER_DEFAULT_PATH, String{std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"}); - } - else - { - path = KEEPER_DEFAULT_PATH; - } - + std::string path = getKeeperPath(config()); std::filesystem::create_directories(path); /// Check that the process user id matches the owner of the data. @@ -562,7 +567,7 @@ try auto main_config_reloader = std::make_unique( config_path, extra_paths, - config().getString("path", KEEPER_DEFAULT_PATH), + getKeeperPath(config()), std::move(unused_cache), unused_event, [&](ConfigurationPtr config, bool /* initial_loading */) From 556c7deeff11e404630948adf3bbd171170dd3eb Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Mon, 24 Jun 2024 01:19:50 +0000 Subject: [PATCH 149/439] add drop option in lightweight delete on table with projections --- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 1 + src/Interpreters/InterpreterDeleteQuery.cpp | 91 ++++++++++++++----- ...61_lightweight_delete_projection.reference | 2 + .../03161_lightweight_delete_projection.sql | 11 ++- 5 files changed, 81 insertions(+), 25 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b3e83092a77..d85edcdae1f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -608,6 +608,7 @@ class IColumn; M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \ M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \ + M(String, lightweight_mutation_projection_mode, "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete.", 0) \ M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 69bc8c5d207..abad02f67c3 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -105,6 +105,7 @@ static const std::maplockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); - if (table->supportsDelete()) - { - /// Convert to MutationCommand - MutationCommands mutation_commands; - MutationCommand mut_command; - - mut_command.type = MutationCommand::Type::DELETE; - mut_command.predicate = delete_query.predicate; - - mutation_commands.emplace_back(mut_command); - - table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); - MutationsInterpreter::Settings settings(false); - MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate(); - table->mutate(mutation_commands, getContext()); - return {}; - } - else if (table->supportsLightweightDelete()) + auto lightweightDelete = [&]() { if (!getContext()->getSettingsRef().enable_lightweight_delete) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, @@ -105,17 +88,77 @@ BlockIO InterpreterDeleteQuery::execute() context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync)); InterpreterAlterQuery alter_interpreter(alter_ast, context); return alter_interpreter.execute(); + }; + + if (table->supportsDelete()) + { + /// Convert to MutationCommand + MutationCommands mutation_commands; + MutationCommand mut_command; + + mut_command.type = MutationCommand::Type::DELETE; + mut_command.predicate = delete_query.predicate; + + mutation_commands.emplace_back(mut_command); + + table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); + MutationsInterpreter::Settings settings(false); + MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate(); + table->mutate(mutation_commands, getContext()); + return {}; + } + else if (table->supportsLightweightDelete()) + { + return lightweightDelete(); } else { - /// Currently just better exception for the case of a table with projection, - /// can act differently according to the setting. if (table->hasProjection()) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "DELETE query is not supported for table {} as it has projections. " - "User should drop all the projections manually before running the query", - table->getStorageID().getFullTableName()); + auto context = Context::createCopy(getContext()); + auto mode = Field(context->getSettingsRef().lightweight_mutation_projection_mode); + if (mode == "throw") + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "DELETE query is not supported for table {} as it has projections. " + "User should drop all the projections manually before running the query", + table->getStorageID().getFullTableName()); + } + else if (mode == "drop") + { + std::vector all_projections = metadata_snapshot->projections.getAllRegisteredNames(); + + context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync)); + + /// Drop projections first so that lightweight delete can be performed. + for (const auto & projection : all_projections) + { + String alter_query = + "ALTER TABLE " + table->getStorageID().getFullTableName() + + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster)) + + " DROP PROJECTION IF EXISTS " + projection; + + ParserAlterQuery parser; + ASTPtr alter_ast = parseQuery( + parser, + alter_query.data(), + alter_query.data() + alter_query.size(), + "ALTER query", + 0, + DBMS_DEFAULT_MAX_PARSER_DEPTH, + DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); + + InterpreterAlterQuery alter_interpreter(alter_ast, context); + alter_interpreter.execute(); + } + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Unrecognized lightweight_mutation_projection_mode, only throw and drop are allowed."); + } + + return lightweightDelete(); } throw Exception(ErrorCodes::BAD_ARGUMENTS, diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index e69de29bb2d..15832d4cdfa 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -0,0 +1,2 @@ +8888 Alice 50 +1231 John 33 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index cd29fae8fd7..786f6a3cc34 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -5,7 +5,8 @@ CREATE TABLE users ( uid Int16, name String, age Int16, - projection p1 (select count(), age group by age) + projection p1 (select count(), age group by age), + projection p2 (select age, name group by age, name) ) ENGINE = MergeTree order by uid; INSERT INTO users VALUES (1231, 'John', 33); @@ -13,3 +14,11 @@ INSERT INTO users VALUES (6666, 'Ksenia', 48); INSERT INTO users VALUES (8888, 'Alice', 50); DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED } + +DELETE FROM users WHERE uid = 8888 SETTINGS lightweight_mutation_projection_mode = 'throw'; -- { serverError NOT_IMPLEMENTED } + +DELETE FROM users WHERE uid = 6666 SETTINGS lightweight_mutation_projection_mode = 'drop'; + +SELECT * FROM users; + +DROP TABLE users; From 636f2506f01e040c450d77bca29dfa8811f00575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 24 Jun 2024 11:35:29 +0200 Subject: [PATCH 150/439] Silence tidy --- base/base/itoa.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index 0997daebbf6..c17a2bfd999 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -59,8 +59,8 @@ namespace jeaiii struct pair { char dd[2]; - constexpr pair(char c) : dd{c, '\0'} { } - constexpr pair(int n) : dd{"0123456789"[n / 10], "0123456789"[n % 10]} { } + constexpr pair(char c) : dd{c, '\0'} { } /// NOLINT(google-explicit-constructor) + constexpr pair(int n) : dd{"0123456789"[n / 10], "0123456789"[n % 10]} { } /// NOLINT(google-explicit-constructor) }; constexpr struct From bdac9d6c24164adc85b5c90f41b58b4d6416daed Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 24 Jun 2024 09:58:00 +0000 Subject: [PATCH 151/439] Fix build bug --- src/Disks/DiskSelector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index 6e57bdad77e..f3b4893e820 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -26,7 +26,7 @@ void DiskSelector::assertInitialized() const } -void DiskSelector::( +void DiskSelector::initialize( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator) { Poco::Util::AbstractConfiguration::Keys keys; From e20136ce25f85d463f4e3a033ac0a2a1d97431e5 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 24 Jun 2024 17:29:41 +0100 Subject: [PATCH 152/439] fix test --- tests/integration/test_memory_limit_observer/test.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index d8c2a0e8ad7..f19e119c019 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -54,10 +54,13 @@ def test_observe_memory_limit(started_cluster): def test_memory_usage_doesnt_include_page_cache_size(started_cluster): - # populate page cache with 10GB of data - node1.exec_in_container( - ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=10K"] - ) + try: + # populate page cache with 10GB of data; it might be killed by OOM killer but it is fine + node1.exec_in_container( + ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=10K"] + ) + except Exception: + pass observer_refresh_period = int( node1.query( From b1f87c578161578a03ee99ab87899cf4deb2c2ef Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 24 Jun 2024 21:04:04 +0000 Subject: [PATCH 153/439] return back settings and fix build --- src/Backups/BackupIO_AzureBlobStorage.cpp | 17 +++-- src/Core/Settings.h | 6 ++ .../AzureBlobStorageCommon.cpp | 68 ++++++++++++------- .../AzureBlobStorage/AzureBlobStorageCommon.h | 9 ++- .../ObjectStorages/ObjectStorageFactory.cpp | 3 +- .../ObjectStorage/Azure/Configuration.cpp | 12 +++- 6 files changed, 74 insertions(+), 41 deletions(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 596c308ca8a..0ee0160a969 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -41,18 +41,17 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( , blob_path(blob_path_) { auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); + auto settings_ptr = AzureBlobStorage::getRequestSettingsForBackup(context_->getSettingsRef(), allow_azure_native_copy); object_storage = std::make_unique( "BackupReaderAzureBlobStorage", std::move(client_ptr), - AzureBlobStorage::getRequestSettings(context_->getSettingsRef()), + std::move(settings_ptr), connection_params.getContainer(), connection_params.getConnectionURL()); client = object_storage->getAzureBlobStorageClient(); - auto settings_copy = *object_storage->getSettings(); - settings_copy.use_native_copy = allow_azure_native_copy; - settings = std::make_unique(settings_copy); + settings = object_storage->getSettings(); } BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default; @@ -122,8 +121,8 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( const AzureBlobStorage::ConnectionParams & connection_params_, - bool allow_azure_native_copy, const String & blob_path_, + bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, @@ -137,17 +136,17 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( connection_params.endpoint.container_already_exists = true; auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); + auto settings_ptr = AzureBlobStorage::getRequestSettingsForBackup(context_->getSettingsRef(), allow_azure_native_copy); + object_storage = std::make_unique( "BackupWriterAzureBlobStorage", std::move(client_ptr), - AzureBlobStorage::getRequestSettings(context_->getSettingsRef()), + std::move(settings_ptr), connection_params.getContainer(), connection_params.getConnectionURL()); client = object_storage->getAzureBlobStorageClient(); - auto settings_copy = *object_storage->getSettings(); - settings_copy.use_native_copy = allow_azure_native_copy; - settings = std::make_unique(settings_copy); + settings = object_storage->getSettings(); } void BackupWriterAzureBlobStorage::copyFileFromDisk( diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ebdb6860986..9d3fedc3063 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -112,16 +112,22 @@ class IColumn; M(Bool, s3_use_adaptive_timeouts, S3::DEFAULT_USE_ADAPTIVE_TIMEOUTS, "When adaptive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \ M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \ + M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \ M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \ M(Bool, s3_skip_empty_files, false, "Allow to skip empty files in s3 table engine", 0) \ + M(Bool, azure_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in azure engine tables", 0) \ M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \ M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \ + M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \ M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, hdfs_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, azure_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, s3_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in S3 table engine", 0) \ M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \ M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \ + M(UInt64, azure_sdk_max_retries, 10, "Maximum number of retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff beetween retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff beetween retries in azure sdk", 0) \ M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \ M(Bool, s3_disable_checksum, S3::DEFAULT_DISABLE_CHECKSUM, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, S3::DEFAULT_RETRY_ATTEMPTS, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp index c2e4bc0dc89..d9dfedadd48 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp @@ -197,7 +197,7 @@ void processURL(const String & url, const String & container_name, Endpoint & en return; } - size_t pos = url.find('?'); + auto pos = url.find('?'); /// If conneciton_url does not have '?', then its not SAS if (pos == std::string::npos) @@ -273,42 +273,60 @@ BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_di std::unique_ptr getRequestSettings(const Settings & query_settings) { - auto settings_ptr = std::make_unique(); + auto settings = std::make_unique(); - settings_ptr->max_single_part_upload_size = query_settings.azure_max_single_part_upload_size; - settings_ptr->max_single_read_retries = query_settings.azure_max_single_read_retries; - settings_ptr->list_object_keys_size = static_cast(query_settings.azure_list_object_keys_size); + settings->max_single_part_upload_size = query_settings.azure_max_single_part_upload_size; + settings->max_single_read_retries = query_settings.azure_max_single_read_retries; + settings->max_single_download_retries = query_settings.azure_max_single_read_retries; + settings->list_object_keys_size = query_settings.azure_list_object_keys_size; + settings->min_upload_part_size = query_settings.azure_min_upload_part_size; + settings->max_upload_part_size = query_settings.azure_max_upload_part_size; + settings->max_single_part_copy_size = query_settings.azure_max_single_part_copy_size; + settings->max_blocks_in_multipart_upload = query_settings.azure_max_blocks_in_multipart_upload; + settings->max_unexpected_write_error_retries = query_settings.azure_max_unexpected_write_error_retries; + settings->max_inflight_parts_for_one_file = query_settings.azure_max_inflight_parts_for_one_file; + settings->strict_upload_part_size = query_settings.azure_strict_upload_part_size; + settings->upload_part_size_multiply_factor = query_settings.azure_upload_part_size_multiply_factor; + settings->upload_part_size_multiply_parts_count_threshold = query_settings.azure_upload_part_size_multiply_parts_count_threshold; + settings->sdk_max_retries = query_settings.azure_sdk_max_retries; + settings->sdk_retry_initial_backoff_ms = query_settings.azure_sdk_retry_initial_backoff_ms; + settings->sdk_retry_max_backoff_ms = query_settings.azure_sdk_retry_max_backoff_ms; - settings_ptr->sdk_max_retries = query_settings.azure_sdk_max_retries; - settings_ptr->sdk_retry_initial_backoff_ms = query_settings.azure_sdk_retry_initial_backoff_ms; - settings_ptr->sdk_retry_max_backoff_ms = query_settings.azure_sdk_retry_max_backoff_ms; + return settings; +} - return settings_ptr; +std::unique_ptr getRequestSettingsForBackup(const Settings & query_settings, bool use_native_copy) +{ + auto settings = getRequestSettings(query_settings); + settings->use_native_copy = use_native_copy; + return settings; } std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) { auto settings = std::make_unique(); + const auto & settings_ref = context->getSettingsRef(); - settings->max_single_part_upload_size = config.getUInt64(config_prefix + ".max_single_part_upload_size", context->getSettings().azure_max_single_part_upload_size); settings->min_bytes_for_seek = config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024); - settings->max_single_read_retries = config.getInt(config_prefix + ".max_single_read_retries", 3); - settings->max_single_download_retries = config.getInt(config_prefix + ".max_single_download_retries", 3); - settings->list_object_keys_size = config.getInt(config_prefix + ".list_object_keys_size", 1000); - settings->min_upload_part_size = config.getUInt64(config_prefix + ".min_upload_part_size", context->getSettings().azure_min_upload_part_size); - settings->max_upload_part_size = config.getUInt64(config_prefix + ".max_upload_part_size", context->getSettings().azure_max_upload_part_size); - settings->max_single_part_copy_size = config.getUInt64(config_prefix + ".max_single_part_copy_size", context->getSettings().azure_max_single_part_copy_size); settings->use_native_copy = config.getBool(config_prefix + ".use_native_copy", false); - settings->max_blocks_in_multipart_upload = config.getUInt64(config_prefix + ".max_blocks_in_multipart_upload", 50000); - settings->max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", context->getSettings().azure_max_unexpected_write_error_retries); - settings->max_inflight_parts_for_one_file = config.getUInt64(config_prefix + ".max_inflight_parts_for_one_file", context->getSettings().azure_max_inflight_parts_for_one_file); - settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", context->getSettings().azure_strict_upload_part_size); - settings->upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".upload_part_size_multiply_factor", context->getSettings().azure_upload_part_size_multiply_factor); - settings->upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".upload_part_size_multiply_parts_count_threshold", context->getSettings().azure_upload_part_size_multiply_parts_count_threshold); - settings->sdk_max_retries = config.getUInt(config_prefix + ".max_tries", 10); - settings->sdk_retry_initial_backoff_ms = config.getUInt(config_prefix + ".retry_initial_backoff_ms", 10); - settings->sdk_retry_max_backoff_ms = config.getUInt(config_prefix + ".retry_max_backoff_ms", 1000); + settings->max_single_part_upload_size = config.getUInt64(config_prefix + ".max_single_part_upload_size", settings_ref.azure_max_single_part_upload_size); + settings->max_single_read_retries = config.getUInt64(config_prefix + ".max_single_read_retries", settings_ref.azure_max_single_read_retries); + settings->max_single_download_retries = config.getUInt64(config_prefix + ".max_single_download_retries", settings_ref.azure_max_single_read_retries); + settings->list_object_keys_size = config.getUInt64(config_prefix + ".list_object_keys_size", settings_ref.azure_list_object_keys_size); + settings->min_upload_part_size = config.getUInt64(config_prefix + ".min_upload_part_size", settings_ref.azure_min_upload_part_size); + settings->max_upload_part_size = config.getUInt64(config_prefix + ".max_upload_part_size", settings_ref.azure_max_upload_part_size); + settings->max_single_part_copy_size = config.getUInt64(config_prefix + ".max_single_part_copy_size", settings_ref.azure_max_single_part_copy_size); + settings->max_blocks_in_multipart_upload = config.getUInt64(config_prefix + ".max_blocks_in_multipart_upload", settings_ref.azure_max_blocks_in_multipart_upload); + settings->max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", settings_ref.azure_max_unexpected_write_error_retries); + settings->max_inflight_parts_for_one_file = config.getUInt64(config_prefix + ".max_inflight_parts_for_one_file", settings_ref.azure_max_inflight_parts_for_one_file); + settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", settings_ref.azure_strict_upload_part_size); + settings->upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".upload_part_size_multiply_factor", settings_ref.azure_upload_part_size_multiply_factor); + settings->upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".upload_part_size_multiply_parts_count_threshold", settings_ref.azure_upload_part_size_multiply_parts_count_threshold); + + settings->sdk_max_retries = config.getUInt64(config_prefix + ".max_tries", settings_ref.azure_sdk_max_retries); + settings->sdk_retry_initial_backoff_ms = config.getUInt64(config_prefix + ".retry_initial_backoff_ms", settings_ref.azure_sdk_retry_initial_backoff_ms); + settings->sdk_retry_max_backoff_ms = config.getUInt64(config_prefix + ".retry_max_backoff_ms", settings_ref.azure_sdk_retry_max_backoff_ms); if (config.has(config_prefix + ".curl_ip_resolve")) { diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h index 5f9f280ad4a..19ba48ea225 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h @@ -32,22 +32,23 @@ struct RequestSettings RequestSettings() = default; size_t max_single_part_upload_size = 100 * 1024 * 1024; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset - uint64_t min_bytes_for_seek = 1024 * 1024; + size_t min_bytes_for_seek = 1024 * 1024; size_t max_single_read_retries = 3; size_t max_single_download_retries = 3; - int list_object_keys_size = 1000; + size_t list_object_keys_size = 1000; size_t min_upload_part_size = 16 * 1024 * 1024; size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; size_t max_single_part_copy_size = 256 * 1024 * 1024; - bool use_native_copy = false; size_t max_unexpected_write_error_retries = 4; size_t max_inflight_parts_for_one_file = 20; + size_t max_blocks_in_multipart_upload = 50000; size_t strict_upload_part_size = 0; size_t upload_part_size_multiply_factor = 2; size_t upload_part_size_multiply_parts_count_threshold = 500; size_t sdk_max_retries = 10; size_t sdk_retry_initial_backoff_ms = 10; size_t sdk_retry_max_backoff_ms = 1000; + bool use_native_copy = false; using CurlOptions = Azure::Core::Http::CurlTransportOptions; CurlOptions::CurlOptIPResolve curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_WHATEVER; @@ -125,7 +126,9 @@ std::unique_ptr getContainerClient(const ConnectionParams & par BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_disk); AuthMethod getAuthMethod(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); + std::unique_ptr getRequestSettings(const Settings & query_settings); +std::unique_ptr getRequestSettingsForBackup(const Settings & query_settings, bool use_native_copy); std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); } diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 3f32b4b410e..092277aca50 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -332,8 +332,9 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) ObjectStorageType::Azure, config, config_prefix, name, AzureBlobStorage::getContainerClient(params, /*readonly=*/ false), std::move(azure_settings), params.endpoint.prefix.empty() ? params.endpoint.container_name : params.endpoint.container_name + "/" + params.endpoint.prefix, - endpoint.getEndpointWithoutContainer()); + params.endpoint.getEndpointWithoutContainer()); }; + factory.registerObjectStorageType("azure_blob_storage", creator); factory.registerObjectStorageType("azure", creator); } diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp index e4b3d61f659..595d4da3609 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.cpp +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -73,10 +73,16 @@ StorageObjectStorage::QuerySettings StorageAzureConfiguration::getQuerySettings( ObjectStoragePtr StorageAzureConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT { assertInitialized(); - auto client = createClient(is_readonly, /* attempt_to_create_container */true); - auto settings = createSettings(context); + + auto settings = AzureBlobStorage::getRequestSettings(context->getSettingsRef()); + auto client = AzureBlobStorage::getContainerClient(connection_params, is_readonly); + return std::make_unique( - "AzureBlobStorage", std::move(client), std::move(settings), container, getConnectionURL().toString()); + "AzureBlobStorage", + connection_params.createForContainer(), + std::move(settings), + connection_params.getContainer(), + connection_params.getConnectionURL()); } static AzureBlobStorage::ConnectionParams getConnectionParams( From 51f300356e0c0df9c7d001ffe0b7967c7d9f438e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 24 Jun 2024 23:57:33 +0000 Subject: [PATCH 154/439] fix style --- src/Core/Settings.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9d3fedc3063..067e46226ea 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -126,8 +126,8 @@ class IColumn; M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \ M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \ M(UInt64, azure_sdk_max_retries, 10, "Maximum number of retries in azure sdk", 0) \ - M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff beetween retries in azure sdk", 0) \ - M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff beetween retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff between retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff between retries in azure sdk", 0) \ M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \ M(Bool, s3_disable_checksum, S3::DEFAULT_DISABLE_CHECKSUM, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, S3::DEFAULT_RETRY_ATTEMPTS, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ From 8d518151242d4ff385a97d35946b971803df4e16 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 25 Jun 2024 12:13:44 +0200 Subject: [PATCH 155/439] Fix --- src/Interpreters/InterpreterSelectQuery.cpp | 1 + src/Planner/PlannerJoinTree.cpp | 7 ++--- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../test_parallel_replicas_custom_key/test.py | 1 + .../test.py | 28 ++++++++----------- 5 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 9b51b3a82f6..bc0635affe9 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -600,6 +600,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( } else if ( storage->isMergeTree() && (storage->supportsReplication() || settings.parallel_replicas_for_non_replicated_merge_tree) + && context->getClientInfo().distributed_depth == 0 && context->canUseParallelReplicasCustomKeyForCluster(*context->getClusterForParallelReplicas())) { context->setSetting("prefer_localhost_replica", Field(0)); diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index a2c6a478ac4..c85e6cd0cd1 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -907,11 +907,10 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } chassert(reading); - if (query_context->canUseParallelReplicasCustomKey()) + if (query_context->canUseParallelReplicasCustomKey() && query_context->getClientInfo().distributed_depth == 0) { - auto cluster = query_context->getClusterForParallelReplicas(); - if (query_context->canUseParallelReplicasCustomKeyForCluster(*cluster) - && query_context->getClientInfo().distributed_depth == 0) + if (auto cluster = query_context->getClusterForParallelReplicas(); + query_context->canUseParallelReplicasCustomKeyForCluster(*cluster)) { planner_context->getMutableQueryContext()->setSetting("prefer_localhost_replica", Field{0}); auto modified_query_info = select_query_info; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 37f8de60eca..70d6041f0e2 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7081,7 +7081,7 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage( if (query_context->getClientInfo().distributed_depth > 0) return QueryProcessingStage::FetchColumns; - if (!settings.parallel_replicas_for_non_replicated_merge_tree) + if (!supportsReplication() && !settings.parallel_replicas_for_non_replicated_merge_tree) return QueryProcessingStage::Enum::FetchColumns; if (to_stage >= QueryProcessingStage::WithMergeableState diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py index cb2c002f237..9a2480a77c3 100644 --- a/tests/integration/test_parallel_replicas_custom_key/test.py +++ b/tests/integration/test_parallel_replicas_custom_key/test.py @@ -91,6 +91,7 @@ def test_parallel_replicas_custom_key_distributed( "max_parallel_replicas": 4, "parallel_replicas_custom_key": custom_key, "parallel_replicas_custom_key_filter_type": filter_type, + "prefer_localhost_replica": 0, }, ) == expected_result diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/test.py b/tests/integration/test_parallel_replicas_custom_key_failover/test.py index 3ba3ce092c3..5c1c8ef71ab 100644 --- a/tests/integration/test_parallel_replicas_custom_key_failover/test.py +++ b/tests/integration/test_parallel_replicas_custom_key_failover/test.py @@ -53,13 +53,11 @@ def create_tables(cluster, table_name): @pytest.mark.parametrize("use_hedged_requests", [1, 0]) @pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) @pytest.mark.parametrize("filter_type", ["default", "range"]) -@pytest.mark.parametrize("prefer_localhost_replica", [0, 1]) def test_parallel_replicas_custom_key_failover( start_cluster, use_hedged_requests, custom_key, filter_type, - prefer_localhost_replica, ): cluster_name = "test_single_shard_multiple_replicas" table = "test_table" @@ -76,7 +74,6 @@ def test_parallel_replicas_custom_key_failover( f"SELECT key, count() FROM cluster('{cluster_name}', currentDatabase(), test_table) GROUP BY key ORDER BY key", settings={ "log_comment": log_comment, - "prefer_localhost_replica": prefer_localhost_replica, "max_parallel_replicas": 4, "parallel_replicas_custom_key": custom_key, "parallel_replicas_custom_key_filter_type": filter_type, @@ -100,20 +97,19 @@ def test_parallel_replicas_custom_key_failover( assert query_id != "" query_id = query_id[:-1] - if prefer_localhost_replica == 0: + assert ( + node1.query( + f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1" + ) + == "subqueries\t4\n" + ) + + # With enabled hedged requests, we can't guarantee exact query distribution among nodes + # In case of a replica being slow in terms of responsiveness, hedged connection can change initial replicas choice + if use_hedged_requests == 0: assert ( node1.query( - f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1" + f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" ) - == "subqueries\t4\n" + == "n1\t3\nn3\t2\n" ) - - # With enabled hedged requests, we can't guarantee exact query distribution among nodes - # In case of a replica being slow in terms of responsiveness, hedged connection can change initial replicas choice - if use_hedged_requests == 0: - assert ( - node1.query( - f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" - ) - == "n1\t3\nn3\t2\n" - ) From 615cd96c6e6893973b14cc0190e510894b747b22 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 25 Jun 2024 12:48:43 +0100 Subject: [PATCH 156/439] fix test --- tests/integration/test_memory_limit_observer/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index f19e119c019..2840c830396 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -55,9 +55,9 @@ def test_observe_memory_limit(started_cluster): def test_memory_usage_doesnt_include_page_cache_size(started_cluster): try: - # populate page cache with 10GB of data; it might be killed by OOM killer but it is fine + # populate page cache with 4GB of data; it might be killed by OOM killer but it is fine node1.exec_in_container( - ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=10K"] + ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=4K"] ) except Exception: pass @@ -76,4 +76,4 @@ def test_memory_usage_doesnt_include_page_cache_size(started_cluster): WHERE logger_name = 'CgroupsMemoryUsageObserver' AND message LIKE 'Read current memory usage%bytes%' """ ).strip() - assert int(max_mem_usage_from_cgroup) < 2 * 2**30 + assert int(max_mem_usage_from_cgroup) < 2 * 2 ** 30 From 1b2a0036da6f20a74f5ad6a700559f0573c77136 Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 25 Jun 2024 16:29:57 +0000 Subject: [PATCH 157/439] Fixed tests --- .../02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference | 4 ++-- .../02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference index 21b38a94cee..a2dd196083e 100644 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference @@ -3,7 +3,7 @@ data after ATTACH 1 Files before DETACH TABLE all_X_X_X -backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: +/backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: checksums.txt columns.txt count.txt @@ -17,7 +17,7 @@ serialization.json Files after DETACH TABLE all_X_X_X -backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: +/backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: checksums.txt columns.txt count.txt diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh index 7725688d225..eec05c81344 100755 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh @@ -55,7 +55,6 @@ path=${path%/} echo "Files before DETACH TABLE" # sed to match any part, since in case of fault injection part name may not be all_0_0_0 but all_1_1_0 -echo "Path $path" clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' $CLICKHOUSE_CLIENT -nm -q " @@ -63,7 +62,6 @@ $CLICKHOUSE_CLIENT -nm -q " detach table data_write; " echo "Files after DETACH TABLE" -echo "Path $path" clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' # metadata file is left From 5da4b69d5745d7187e6a21a14793d7355590423b Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 25 Jun 2024 17:20:42 +0000 Subject: [PATCH 158/439] Tried to fix tests --- programs/disks/DisksClient.cpp | 16 +++++++++------- programs/disks/DisksClient.h | 5 +---- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/programs/disks/DisksClient.cpp b/programs/disks/DisksClient.cpp index e38f7ec99b8..379c87e4f2f 100644 --- a/programs/disks/DisksClient.cpp +++ b/programs/disks/DisksClient.cpp @@ -29,15 +29,17 @@ DiskWithPath::DiskWithPath(DiskPtr disk_, std::optional path_) : disk(di path = String{"/"}; } - if (!disk->isDirectory(normalizePathAndGetAsRelative(path))) + String relative_path = normalizePathAndGetAsRelative(path); + if (disk->isDirectory(relative_path) || (relative_path.empty() && (disk->isDirectory("/")))) { - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Initializing path {} (normalized path: {}) at disk {} is not a directory", - path, - normalizePathAndGetAsRelative(path), - disk->getName()); + return; } + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Initializing path {} (normalized path: {}) at disk {} is not a directory", + path, + relative_path, + disk->getName()); } std::vector DiskWithPath::listAllFilesByPath(const String & any_path) const diff --git a/programs/disks/DisksClient.h b/programs/disks/DisksClient.h index ab99d2f6590..3320c5f7cef 100644 --- a/programs/disks/DisksClient.h +++ b/programs/disks/DisksClient.h @@ -32,10 +32,7 @@ public: String getCurrentPath() const { return path; } - bool isDirectory(const String & any_path) const - { - return disk->isDirectory(getRelativeFromRoot(any_path)) || disk->isDirectory(getAbsolutePath(any_path)); - } + bool isDirectory(const String & any_path) const { return disk->isDirectory(getRelativeFromRoot(any_path)); } std::vector listAllFilesByPath(const String & any_path) const; From d9f681b39d22a99f8ace10f39ed308621b27a774 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 26 Jun 2024 10:44:17 +0200 Subject: [PATCH 159/439] Disable stacktrace collection in GWPAsan by default --- src/Common/GWPAsan.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Common/GWPAsan.cpp b/src/Common/GWPAsan.cpp index 488f8e2c5dc..ea376609ff4 100644 --- a/src/Common/GWPAsan.cpp +++ b/src/Common/GWPAsan.cpp @@ -57,9 +57,12 @@ static bool guarded_alloc_initialized = [] opts.MaxSimultaneousAllocations = 1024; if (!env_options_raw || !std::string_view{env_options_raw}.contains("SampleRate")) - opts.SampleRate = 50000; + opts.SampleRate = 5000; + + const char * collect_stacktraces = std::getenv("GWP_ASAN_COLLECT_STACKTRACES"); // NOLINT(concurrency-mt-unsafe) + if (collect_stacktraces && std::string_view{collect_stacktraces} == "1") + opts.Backtrace = getBackTrace; - opts.Backtrace = getBackTrace; GuardedAlloc.init(opts); return true; From c30ecee10c8cba5d245916d458ddd60345ea359c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 26 Jun 2024 12:40:55 +0000 Subject: [PATCH 160/439] remove unused code --- src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h | 2 +- src/Storages/ObjectStorage/Azure/Configuration.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index baad3bdf223..2c7ce5e18dc 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -1,5 +1,4 @@ #pragma once -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -9,6 +8,7 @@ #include #include #include +#include namespace Poco { diff --git a/src/Storages/ObjectStorage/Azure/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h index 272d155e337..4e6bfbc0745 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.h +++ b/src/Storages/ObjectStorage/Azure/Configuration.h @@ -57,9 +57,6 @@ protected: std::string blob_path; std::vector blobs_paths; AzureBlobStorage::ConnectionParams connection_params; - - // AzureClientPtr createClient(bool is_read_only, bool attempt_to_create_container); - // AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context); }; } From e8e1cd79e0325a6ab1a73b59fe9ab830bb8561ac Mon Sep 17 00:00:00 2001 From: divanik Date: Wed, 26 Jun 2024 13:17:39 +0000 Subject: [PATCH 161/439] Corrected integration tests --- .../test_backup_restore_s3/test.py | 273 +++--------------- tests/integration/test_disk_types/test.py | 13 +- .../test_endpoint_macro_substitution/test.py | 7 +- 3 files changed, 45 insertions(+), 248 deletions(-) diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index bd600a06c7f..05424887736 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -3,11 +3,8 @@ import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV import uuid -import os -CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") - cluster = ClickHouseCluster(__file__) node = cluster.add_instance( "node", @@ -23,127 +20,13 @@ node = cluster.add_instance( ], with_minio=True, with_zookeeper=True, - stay_alive=True, ) -def setup_minio_users(): - # create 2 extra users with restricted access - # miniorestricted1 - full access to bucket 'root', no access to other buckets - # miniorestricted2 - full access to bucket 'root2', no access to other buckets - # storage policy 'policy_s3_restricted' defines a policy for storing files inside bucket 'root' using 'miniorestricted1' user - for user, bucket in [("miniorestricted1", "root"), ("miniorestricted2", "root2")]: - print( - cluster.exec_in_container( - cluster.minio_docker_id, - [ - "mc", - "alias", - "set", - "root", - "http://minio1:9001", - "minio", - "minio123", - ], - ) - ) - policy = f""" -{{ - "Version": "2012-10-17", - "Statement": [ - {{ - "Effect": "Allow", - "Principal": {{ - "AWS": [ - "*" - ] - }}, - "Action": [ - "s3:GetBucketLocation", - "s3:ListBucket", - "s3:ListBucketMultipartUploads" - ], - "Resource": [ - "arn:aws:s3:::{bucket}" - ] - }}, - {{ - "Effect": "Allow", - "Principal": {{ - "AWS": [ - "*" - ] - }}, - "Action": [ - "s3:AbortMultipartUpload", - "s3:DeleteObject", - "s3:GetObject", - "s3:ListMultipartUploadParts", - "s3:PutObject" - ], - "Resource": [ - "arn:aws:s3:::{bucket}/*" - ] - }} - ] -}}""" - - cluster.exec_in_container( - cluster.minio_docker_id, - ["bash", "-c", f"cat >/tmp/{bucket}_policy.json < Date: Wed, 26 Jun 2024 13:25:34 +0000 Subject: [PATCH 162/439] Revert some tests --- tests/integration/test_multiple_disks/test.py | 16 +++++++++------- tests/integration/test_store_cleanup/test.py | 5 +---- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index 32013dd0757..fdd81284b2a 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -83,7 +83,6 @@ def test_system_tables(start_cluster): "path": "/external/", "keep_free_space": "0", }, - {"name": "local", "path": "/", "keep_free_space": "0"}, ] click_disk_data = json.loads( @@ -1784,12 +1783,15 @@ def test_move_across_policies_does_not_work(start_cluster): except QueryRuntimeException: """All parts of partition 'all' are already on disk 'jbod2'.""" - # works when attach - node1.query( - """ALTER TABLE {name}2 ATTACH PARTITION tuple() FROM {name}""".format( - name=name + with pytest.raises( + QueryRuntimeException, + match=".*because disk does not belong to storage policy.*", + ): + node1.query( + """ALTER TABLE {name}2 ATTACH PARTITION tuple() FROM {name}""".format( + name=name + ) ) - ) with pytest.raises( QueryRuntimeException, @@ -1812,7 +1814,7 @@ def test_move_across_policies_does_not_work(start_cluster): ) assert node1.query( - """SELECT * FROM {name}2""".format(name=name) + """SELECT * FROM {name}""".format(name=name) ).splitlines() == ["1"] finally: diff --git a/tests/integration/test_store_cleanup/test.py b/tests/integration/test_store_cleanup/test.py index aebfde694b3..6c5a20a758a 100644 --- a/tests/integration/test_store_cleanup/test.py +++ b/tests/integration/test_store_cleanup/test.py @@ -1,4 +1,3 @@ -from time import sleep import pytest from helpers.cluster import ClickHouseCluster @@ -154,9 +153,7 @@ def test_store_cleanup(started_cluster): "directories from store", timeout=90, look_behind_lines=1000000 ) node1.wait_for_log_line( - "Nothing to clean up from store/ on disk default", - timeout=90, - look_behind_lines=1000000, + "Nothing to clean up from store/", timeout=90, look_behind_lines=1000000 ) store = node1.exec_in_container(["ls", f"{path_to_data}/store"]) From cef503cb4c6b27cc580d948e8b0e3688997e99ff Mon Sep 17 00:00:00 2001 From: divanik Date: Wed, 26 Jun 2024 13:47:15 +0000 Subject: [PATCH 163/439] Revert test --- .../test_backup_restore_s3/test.py | 248 ++++++++++++++++-- 1 file changed, 222 insertions(+), 26 deletions(-) diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 05424887736..d53335000a6 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -3,8 +3,11 @@ import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV import uuid +import os +CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") + cluster = ClickHouseCluster(__file__) node = cluster.add_instance( "node", @@ -20,13 +23,127 @@ node = cluster.add_instance( ], with_minio=True, with_zookeeper=True, + stay_alive=True, ) +def setup_minio_users(): + # create 2 extra users with restricted access + # miniorestricted1 - full access to bucket 'root', no access to other buckets + # miniorestricted2 - full access to bucket 'root2', no access to other buckets + # storage policy 'policy_s3_restricted' defines a policy for storing files inside bucket 'root' using 'miniorestricted1' user + for user, bucket in [("miniorestricted1", "root"), ("miniorestricted2", "root2")]: + print( + cluster.exec_in_container( + cluster.minio_docker_id, + [ + "mc", + "alias", + "set", + "root", + "http://minio1:9001", + "minio", + "minio123", + ], + ) + ) + policy = f""" +{{ + "Version": "2012-10-17", + "Statement": [ + {{ + "Effect": "Allow", + "Principal": {{ + "AWS": [ + "*" + ] + }}, + "Action": [ + "s3:GetBucketLocation", + "s3:ListBucket", + "s3:ListBucketMultipartUploads" + ], + "Resource": [ + "arn:aws:s3:::{bucket}" + ] + }}, + {{ + "Effect": "Allow", + "Principal": {{ + "AWS": [ + "*" + ] + }}, + "Action": [ + "s3:AbortMultipartUpload", + "s3:DeleteObject", + "s3:GetObject", + "s3:ListMultipartUploadParts", + "s3:PutObject" + ], + "Resource": [ + "arn:aws:s3:::{bucket}/*" + ] + }} + ] +}}""" + + cluster.exec_in_container( + cluster.minio_docker_id, + ["bash", "-c", f"cat >/tmp/{bucket}_policy.json < Date: Wed, 26 Jun 2024 14:18:14 +0000 Subject: [PATCH 164/439] do not optimize with group_by_use_nulls --- .../Passes/FunctionToSubcolumnsPass.cpp | 20 +++++++++++++------ ...71_function_to_subcolumns_fuzzer.reference | 6 ++++++ .../03171_function_to_subcolumns_fuzzer.sql | 10 ++++++++++ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index bc2028e1b43..90051779a26 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -273,18 +273,26 @@ public: if (const auto * join_node = node->as()) { - has_join_use_nulls |= getContext()->getSettingsRef().join_use_nulls; + can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().join_use_nulls; + return; + } + + if (const auto * query_node = node->as()) + { + if (query_node->isGroupByWithCube() || query_node->isGroupByWithRollup() || query_node->isGroupByWithGroupingSets()) + can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().group_by_use_nulls; return; } } std::unordered_set getIdentifiersToOptimize() const { - if (has_join_use_nulls) + if (can_wrap_result_columns_with_nullable) { /// Do not optimize if we have JOIN with setting join_use_null. + /// Do not optimize if we have GROUP BY WITH ROLLUP/CUBE/GROUPING SETS with setting group_by_use_nulls. /// It may change the behaviour if subcolumn can be converted - /// to nullable while the original column cannot. + /// to Nullable while the original column cannot (e.g. for Array type). return {}; } @@ -323,7 +331,7 @@ private: std::unordered_map optimized_identifiers_count; NameSet processed_tables; - bool has_join_use_nulls = false; + bool can_wrap_result_columns_with_nullable = false; void enterImpl(const TableNode & table_node) { @@ -342,9 +350,9 @@ private: const auto & metadata_snapshot = table_node.getStorageSnapshot()->metadata; const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey(); - add_key_columns(primary_key_columns); - const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); + + add_key_columns(primary_key_columns); add_key_columns(partition_key_columns); for (const auto & index : metadata_snapshot->getSecondaryIndices()) diff --git a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference index be47c4ab571..1fc6683620c 100644 --- a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference +++ b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference @@ -1,3 +1,9 @@ 1 2 1 3 0 +0 450 +1 460 +2 470 +3 480 +4 490 +\N 4950 diff --git a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql index 587288bbfdf..f10019a78dd 100644 --- a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql +++ b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql @@ -37,3 +37,13 @@ FULL OUTER JOIN WHERE empty(arr); DROP TABLE t_func_to_subcolumns_join; + +DROP TABLE IF EXISTS t_func_to_subcolumns_use_nulls; + +CREATE TABLE t_func_to_subcolumns_use_nulls (arr Array(UInt64), v UInt64) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_func_to_subcolumns_use_nulls SELECT range(number % 10), number FROM numbers(100); + +SELECT length(arr) AS n, sum(v) FROM t_func_to_subcolumns_use_nulls GROUP BY n WITH ROLLUP HAVING n <= 4 OR isNull(n) ORDER BY n SETTINGS group_by_use_nulls = 1; + +DROP TABLE t_func_to_subcolumns_use_nulls; From 5cebec8152e1a755874164b70362aa2ee0f57423 Mon Sep 17 00:00:00 2001 From: divanik Date: Wed, 26 Jun 2024 15:59:15 +0000 Subject: [PATCH 165/439] fix some issues --- .../operations/utilities/clickhouse-disks.md | 2 +- programs/disks/CMakeLists.txt | 1 + programs/disks/CommandTouch.cpp | 34 +++++++++++++++++++ programs/disks/CommandWrite.cpp | 2 +- programs/disks/DisksApp.cpp | 4 +++ programs/disks/DisksApp.h | 2 ++ programs/disks/DisksClient.h | 5 ++- programs/disks/ICommand.h | 1 + 8 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 programs/disks/CommandTouch.cpp diff --git a/docs/en/operations/utilities/clickhouse-disks.md b/docs/en/operations/utilities/clickhouse-disks.md index aeca49c0e1e..e22bc06b641 100644 --- a/docs/en/operations/utilities/clickhouse-disks.md +++ b/docs/en/operations/utilities/clickhouse-disks.md @@ -56,4 +56,4 @@ In these documentation file all mandatory positional arguments are referred as ` * `switch-disk [--path path] ` Switch to disk `disk` on path `path` (if `path` is not specified default value is a previous path on disk `disk`). * `write (w) [--path-from path] `. - Write a file from `path` (`stdin` if not supplied) to `path-to`. + Write a file from `path` (`stdin` if `path` is not supplied, input must finish by Ctrl+D) to `path-to`. diff --git a/programs/disks/CMakeLists.txt b/programs/disks/CMakeLists.txt index 40f9cf3401c..7e8afe084fb 100644 --- a/programs/disks/CMakeLists.txt +++ b/programs/disks/CMakeLists.txt @@ -14,6 +14,7 @@ set (CLICKHOUSE_DISKS_SOURCES CommandSwitchDisk.cpp CommandWrite.cpp CommandHelp.cpp + CommandTouch.cpp CommandGetCurrentDiskAndPath.cpp) if (CLICKHOUSE_CLOUD) diff --git a/programs/disks/CommandTouch.cpp b/programs/disks/CommandTouch.cpp new file mode 100644 index 00000000000..c0bdb64cf9e --- /dev/null +++ b/programs/disks/CommandTouch.cpp @@ -0,0 +1,34 @@ +#include +#include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandTouch final : public ICommand +{ +public: + explicit CommandTouch() : ICommand() + { + command_name = "touch"; + description = "Create a file by path"; + options_description.add_options()("path", po::value(), "the path of listing (mandatory, positional)"); + positional_options_description.add("path", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override + { + auto disk = client.getCurrentDiskWithPath(); + String path = getValueFromCommandLineOptionsThrow(options, "path"); + + disk.getDisk()->createFile(disk.getRelativeFromRoot(path)); + } +}; + +CommandPtr makeCommandTouch() +{ + return std::make_shared(); +} +} diff --git a/programs/disks/CommandWrite.cpp b/programs/disks/CommandWrite.cpp index 433ebb3d5cf..ffe27f37c0e 100644 --- a/programs/disks/CommandWrite.cpp +++ b/programs/disks/CommandWrite.cpp @@ -16,7 +16,7 @@ public: { command_name = "write"; description = "Write a file from `FROM_PATH` to `TO_PATH`"; - options_description.add_options()("path-from", po::value(), "file from which we are reading, defaults to `stdin`")( + options_description.add_options()("path-from", po::value(), "file from which we are reading, defaults to `stdin` (input from `stdin` is finished by Ctrl+D)")( "path-to", po::value(), "file to which we are writing (mandatory, positional)"); positional_options_description.add("path-to", 1); } diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 9ef051a2ece..392fca8e035 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -89,6 +89,7 @@ std::vector DisksApp::getCommandsToComplete(const String & command_prefi } if (!answer.empty()) { + std::sort(answer.begin(), answer.end()); return answer; } for (const auto & [word, _] : aliases) @@ -100,6 +101,7 @@ std::vector DisksApp::getCommandsToComplete(const String & command_prefi } if (!answer.empty()) { + std::sort(answer.begin(), answer.end()); return answer; } return {command_prefix}; @@ -179,6 +181,7 @@ std::vector DisksApp::getCompletions(const String & prefix) const } if (!answer.empty()) { + std::sort(answer.begin(), answer.end()); return answer; } else @@ -292,6 +295,7 @@ void DisksApp::addOptions() command_descriptions.emplace("mkdir", makeCommandMkDir()); command_descriptions.emplace("switch-disk", makeCommandSwitchDisk()); command_descriptions.emplace("current_disk_with_path", makeCommandGetCurrentDiskAndPath()); + command_descriptions.emplace("touch", makeCommandTouch()); command_descriptions.emplace("help", makeCommandHelp(*this)); #ifdef CLICKHOUSE_CLOUD command_descriptions.emplace("packed-io", makeCommandPackedIO()); diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h index 1ecd9944fb8..f8167884c62 100644 --- a/programs/disks/DisksApp.h +++ b/programs/disks/DisksApp.h @@ -84,8 +84,10 @@ private: {"list_disks", "list-disks"}, {"ln", "link"}, {"rm", "remove"}, + {"cat", "read"}, {"r", "read"}, {"w", "write"}, + {"create", "touch"}, {"delete", "remove"}, {"ls-disks", "list-disks"}, {"ls_disks", "list-disks"}, diff --git a/programs/disks/DisksClient.h b/programs/disks/DisksClient.h index 3320c5f7cef..8a55d22af93 100644 --- a/programs/disks/DisksClient.h +++ b/programs/disks/DisksClient.h @@ -32,7 +32,10 @@ public: String getCurrentPath() const { return path; } - bool isDirectory(const String & any_path) const { return disk->isDirectory(getRelativeFromRoot(any_path)); } + bool isDirectory(const String & any_path) const + { + return disk->isDirectory(getRelativeFromRoot(any_path)) || (getRelativeFromRoot(any_path).empty() && (disk->isDirectory("/"))); + } std::vector listAllFilesByPath(const String & any_path) const; diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h index 2b409d4ade6..4b0ec731966 100644 --- a/programs/disks/ICommand.h +++ b/programs/disks/ICommand.h @@ -123,6 +123,7 @@ DB::CommandPtr makeCommandMkDir(); DB::CommandPtr makeCommandSwitchDisk(); DB::CommandPtr makeCommandGetCurrentDiskAndPath(); DB::CommandPtr makeCommandHelp(const DisksApp & disks_app); +DB::CommandPtr makeCommandTouch(); #ifdef CLICKHOUSE_CLOUD DB::CommandPtr makeCommandPackedIO(); #endif From 4377a1f4edc6642583110c1597d0c72e30613aa2 Mon Sep 17 00:00:00 2001 From: divanik Date: Wed, 26 Jun 2024 16:12:55 +0000 Subject: [PATCH 166/439] Take test from master --- tests/integration/test_multiple_disks/test.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index fdd81284b2a..e97ffeb4cc3 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -1783,15 +1783,12 @@ def test_move_across_policies_does_not_work(start_cluster): except QueryRuntimeException: """All parts of partition 'all' are already on disk 'jbod2'.""" - with pytest.raises( - QueryRuntimeException, - match=".*because disk does not belong to storage policy.*", - ): - node1.query( - """ALTER TABLE {name}2 ATTACH PARTITION tuple() FROM {name}""".format( - name=name - ) + # works when attach + node1.query( + """ALTER TABLE {name}2 ATTACH PARTITION tuple() FROM {name}""".format( + name=name ) + ) with pytest.raises( QueryRuntimeException, @@ -1814,7 +1811,7 @@ def test_move_across_policies_does_not_work(start_cluster): ) assert node1.query( - """SELECT * FROM {name}""".format(name=name) + """SELECT * FROM {name}2""".format(name=name) ).splitlines() == ["1"] finally: From a67d468eaa33bba0a82073f37a0d17eefd5a3e86 Mon Sep 17 00:00:00 2001 From: divanik Date: Wed, 26 Jun 2024 17:25:43 +0000 Subject: [PATCH 167/439] Forbid unregistered options --- programs/disks/ICommand.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/disks/ICommand.cpp b/programs/disks/ICommand.cpp index 0c149a8f9df..f622bcad3c6 100644 --- a/programs/disks/ICommand.cpp +++ b/programs/disks/ICommand.cpp @@ -14,7 +14,7 @@ CommandLineOptions ICommand::processCommandLineArguments(const Strings & command { CommandLineOptions options; auto parser = po::command_line_parser(commands); - parser.options(options_description).positional(positional_options_description).allow_unregistered(); + parser.options(options_description).positional(positional_options_description); po::parsed_options parsed = parser.run(); po::store(parsed, options); From b06eac085bddc3f0ed6a2f5d2ac0a4ddcdd8259c Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 26 Jun 2024 20:01:17 +0200 Subject: [PATCH 168/439] work with review --- src/Common/CollectionOfDerived.h | 21 +- src/Core/Settings.h | 2 +- src/Interpreters/AsynchronousInsertQueue.cpp | 8 +- src/Interpreters/InterpreterCreateQuery.cpp | 6 +- src/Interpreters/InterpreterExplainQuery.cpp | 8 +- src/Interpreters/SystemLog.cpp | 8 +- .../DeduplicationTokenTransforms.cpp | 56 +- .../Transforms/DeduplicationTokenTransforms.h | 22 +- .../Transforms/buildPushingToViewsChain.cpp | 22 +- src/Storages/Distributed/DistributedSink.cpp | 16 +- src/Storages/FileLog/StorageFileLog.cpp | 9 +- src/Storages/HDFS/StorageHDFS.cpp | 1207 ----------------- src/Storages/Kafka/StorageKafka.cpp | 8 +- src/Storages/MaterializedView/RefreshTask.cpp | 8 +- src/Storages/MergeTree/MergeTreeSink.cpp | 24 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 6 - src/Storages/NATS/StorageNATS.cpp | 8 +- .../MaterializedPostgreSQLConsumer.cpp | 8 +- .../PostgreSQLReplicationHandler.cpp | 8 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 8 +- src/Storages/S3Queue/StorageS3Queue.cpp | 8 +- src/Storages/StorageBuffer.cpp | 8 +- src/Storages/StorageDistributed.cpp | 8 +- src/Storages/WindowView/StorageWindowView.cpp | 17 +- 24 files changed, 220 insertions(+), 1284 deletions(-) delete mode 100644 src/Storages/HDFS/StorageHDFS.cpp diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h index 60a91e593f9..97c0c3fbc06 100644 --- a/src/Common/CollectionOfDerived.h +++ b/src/Common/CollectionOfDerived.h @@ -2,6 +2,8 @@ #include +#include + #include #include #include @@ -12,6 +14,16 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/* This is a collections of objects derived from ItemBase. +* Collection contains no more than one instance for each derived type. +* The derived type is used to access the instance. +*/ + template class CollectionOfDerivedItems { @@ -67,15 +79,16 @@ public: { Self result; result.records.reserve(records.size()); - for (const auto & rec: records) + for (const auto & rec : records) result.records.emplace_back(rec.type_idx, rec.ptr->clone()); return result; } void append(Self && other) { + auto middle_idx = records.size(); std::move(other.records.begin(), other.records.end(), std::back_inserter(records)); - std::sort(records.begin(), records.end()); + std::inplace_merge(records.begin(), records.begin() + middle_idx, records.end()); chassert(isUniqTypes()); } @@ -143,7 +156,9 @@ private: return; } - chassert(it->type_idx != type_idx); + if (it->type_idx == type_idx) + throw Exception(ErrorCodes::LOGICAL_ERROR, "inserted items must be unique by their type, type {} is inserted twice", type_idx.name()); + records.emplace(it, type_idx, item); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a272456470a..c400873a47c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -632,7 +632,6 @@ class IColumn; M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ - M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Deprecated.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ @@ -948,6 +947,7 @@ class IColumn; #define OBSOLETE_SETTINGS(M, ALIAS) \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ + MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 1) \ MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \ MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \ MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \ diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 94c024ba786..dd1166a9228 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -301,7 +301,13 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const auto & insert_query = query->as(); insert_query.async_insert_flush = true; - InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns, false, false, false); + InterpreterInsertQuery interpreter( + query, + query_context, + query_context->getSettingsRef().insert_allow_materialized_columns, + /* no_squash */ false, + /* no_destination */ false, + /* async_insert */ false); auto table = interpreter.getTable(insert_query); auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ef222a6842f..dbbdb546260 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1750,9 +1750,9 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) insert, getContext(), getContext()->getSettingsRef().insert_allow_materialized_columns, - false, - false, - false).execute(); + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false).execute(); } return {}; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index b837490dad9..8392f0541f1 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -524,7 +524,13 @@ QueryPipeline InterpreterExplainQuery::executeImpl() } else if (dynamic_cast(ast.getExplainedQuery().get())) { - InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext(), false, false, false, false); + InterpreterInsertQuery insert( + ast.getExplainedQuery(), + getContext(), + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto io = insert.execute(); printPipeline(io.pipeline.getProcessors(), buf); } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 8f97b6ea263..8d4882372ff 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -537,7 +537,13 @@ void SystemLog::flushImpl(const std::vector & to_flush, insert_context->makeQueryContext(); addSettingsForQuery(insert_context, IAST::QueryKind::Insert); - InterpreterInsertQuery interpreter(query_ptr, insert_context, false, false, false, false); + InterpreterInsertQuery interpreter( + query_ptr, + insert_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); BlockIO io = interpreter.execute(); PushingPipelineExecutor executor(io.pipeline); diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index 0701e958877..23e32415f6a 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -26,10 +26,16 @@ void RestoreChunkInfosTransform::transform(Chunk & chunk) namespace DeduplicationToken { -String DB::DeduplicationToken::TokenInfo::getToken(bool enable_assert) const +String TokenInfo::getToken() const { - chassert(stage == VIEW_ID || !enable_assert); + if (stage != VIEW_ID) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + return getTokenImpl(); +} + +String TokenInfo::getTokenImpl() const +{ String result; result.reserve(getTotalSize()); @@ -43,13 +49,20 @@ String DB::DeduplicationToken::TokenInfo::getToken(bool enable_assert) const return result; } -void DB::DeduplicationToken::TokenInfo::addPieceToInitialToken(String part) +String TokenInfo::debugToken() const { - chassert(stage == INITIAL); + return getTokenImpl(); +} + + +void TokenInfo::addPieceToInitialToken(String part) +{ + if (stage != INITIAL) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); addTokenPart(std::move(part)); } -void DB::DeduplicationToken::TokenInfo::closeInitialToken() +void TokenInfo::closeInitialToken() { chassert(stage == INITIAL); stage = VIEW_ID; @@ -57,29 +70,37 @@ void DB::DeduplicationToken::TokenInfo::closeInitialToken() void TokenInfo::setUserToken(const String & token) { - chassert(stage == INITIAL); + if (stage != INITIAL) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + addTokenPart(fmt::format("user-token-{}", token)); stage = SOURCE_BLOCK_NUMBER; } -void TokenInfo::setSourceBlockNumber(size_t sbn) +void TokenInfo::setSourceBlockNumber(size_t block_number) { - chassert(stage == SOURCE_BLOCK_NUMBER); - addTokenPart(fmt::format("source-number-{}", sbn)); + if (stage != SOURCE_BLOCK_NUMBER) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + addTokenPart(fmt::format("source-number-{}", block_number)); stage = VIEW_ID; } void TokenInfo::setViewID(const String & id) { - chassert(stage == VIEW_ID); + if (stage != VIEW_ID) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + addTokenPart(fmt::format("view-id-{}", id)); stage = VIEW_BLOCK_NUMBER; } -void TokenInfo::setViewBlockNumber(size_t mvbn) +void TokenInfo::setViewBlockNumber(size_t block_number) { - chassert(stage == VIEW_BLOCK_NUMBER); - addTokenPart(fmt::format("view-block-{}", mvbn)); + if (stage != VIEW_BLOCK_NUMBER) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + addTokenPart(fmt::format("view-block-{}", block_number)); stage = VIEW_ID; } @@ -91,8 +112,7 @@ void TokenInfo::reset() void TokenInfo::addTokenPart(String part) { - if (!part.empty()) - parts.push_back(std::move(part)); + parts.push_back(std::move(part)); } size_t TokenInfo::getTotalSize() const @@ -107,6 +127,7 @@ size_t TokenInfo::getTotalSize() const return size + parts.size() - 1; } +#ifdef ABORT_ON_LOGICAL_ERROR void CheckTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); @@ -116,12 +137,13 @@ void CheckTokenTransform::transform(Chunk & chunk) if (!must_be_present) { - LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, no token required, token {}", debug, token_info->getToken(false)); + LOG_DEBUG(log, "{}, no token required, token {}", debug, token_info->debugToken()); return; } - LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, token: {}", debug, token_info->getToken(false)); + LOG_DEBUG(log, "{}, token: {}", debug, token_info->debugToken()); } +#endif String SetInitialTokenTransform::getInitialToken(const Chunk & chunk) { diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 27bb21dfad1..ebbbb0f7590 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -4,6 +4,7 @@ #include #include +#include "Common/Logger.h" namespace DB @@ -33,7 +34,8 @@ namespace DeduplicationToken TokenInfo() = default; TokenInfo(const TokenInfo & other) = default; - String getToken(bool enable_assert = true) const; + String getToken() const; + String debugToken() const; bool empty() const { return parts.empty(); } bool tokenInitialized() const { return stage != INITIAL && stage != SOURCE_BLOCK_NUMBER; } @@ -41,15 +43,25 @@ namespace DeduplicationToken void addPieceToInitialToken(String part); void closeInitialToken(); void setUserToken(const String & token); - void setSourceBlockNumber(size_t sbn); + void setSourceBlockNumber(size_t block_number); void setViewID(const String & id); - void setViewBlockNumber(size_t mvbn); + void setViewBlockNumber(size_t block_number); void reset(); private: + String getTokenImpl() const; + void addTokenPart(String part); size_t getTotalSize() const; + /* Token has to be prepared in a particular order. BuildingStage ensure that token is expanded according the foloving order. + * Firstly token has expand with information about the souce. + * INITIAL -- in that stage token is expanded with several hash sums or with the user defined deduplication token. + * SOURCE_BLOCK_NUMBER -- when token is expand with user defined deduplication token, after token has to be expanded with source block number. + * After that token is considered as prepared for usage, hovewer it could be expanded with following details: + * VIEW_ID -- in that stage token is expanded with view id, token could not be used until nex stage is passed. + * VIEW_BLOCK_NUMBER - in that stage token is expanded with view block number. + */ enum BuildingStage { INITIAL, @@ -63,6 +75,8 @@ namespace DeduplicationToken }; +#ifdef ABORT_ON_LOGICAL_ERROR + /// use that class only with debug builds in CI for introspection class CheckTokenTransform : public ISimpleTransform { public: @@ -79,8 +93,10 @@ namespace DeduplicationToken private: String debug; + LoggerPtr log = getLogger("CheckInsertDeduplicationTokenTransform"); bool must_be_present = false; }; +#endif class AddTokenInfoTransform : public ISimpleTransform diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index aba28391879..713ab25600f 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -27,6 +27,7 @@ #include #include #include +#include "base/defines.h" #include #include @@ -225,7 +226,6 @@ std::optional generateViewChain( if (disable_deduplication_for_children) { insert_context->setSetting("insert_deduplicate", Field{false}); - insert_context->setSetting("insert_deduplication_token", Field{""}); } // Processing of blocks for MVs is done block by block, and there will @@ -333,7 +333,13 @@ std::optional generateViewChain( insert_columns.emplace_back(column.name); } - InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false, false); + InterpreterInsertQuery interpreter( + nullptr, + insert_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false` bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; @@ -350,7 +356,9 @@ std::optional generateViewChain( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } +#ifdef ABORT_ON_LOGICAL_ERROR out.addSource(std::make_shared("Before squashing", !disable_deduplication_for_children, out.getInputHeader())); +#endif auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); @@ -394,7 +402,9 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { +#ifdef ABORT_ON_LOGICAL_ERROR out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); +#endif auto executing_inner_query = std::make_shared( storage_header, views_data->views.back(), views_data, disable_deduplication_for_children); @@ -402,7 +412,9 @@ std::optional generateViewChain( out.addSource(std::move(executing_inner_query)); +#ifdef ABORT_ON_LOGICAL_ERROR out.addSource(std::make_shared("Right before Inner query", !disable_deduplication_for_children, out.getInputHeader())); +#endif } return out; @@ -459,8 +471,6 @@ Chain buildPushingToViewsChain( for (const auto & view_id : views) { - LOG_DEBUG(&Poco::Logger::get("PushingToViews"), "dependent view: {}.{}", view_id.database_name, view_id.table_name); - try { auto out = generateViewChain( @@ -569,7 +579,7 @@ Chain buildPushingToViewsChain( } else { - result_chain.addSource(std::make_shared(storage_header)); + result_chain.addSource(std::make_shared(storage_header)); } if (result_chain.empty()) @@ -586,7 +596,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection chunk_infos, bool disable_deduplication_for_children) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection && chunk_infos, bool disable_deduplication_for_children) { const auto & context = view.context; diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 2e3096683d0..8791668cd89 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -420,7 +420,13 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si /// to resolve tables (in InterpreterInsertQuery::getTable()) auto copy_query_ast = query_ast->clone(); - InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized, false, false, false); + InterpreterInsertQuery interp( + copy_query_ast, + job.local_context, + allow_materialized, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interp.execute(); job.pipeline = std::move(block_io.pipeline); @@ -715,7 +721,13 @@ void DistributedSink::writeToLocal(const Cluster::ShardInfo & shard_info, const try { - InterpreterInsertQuery interp(query_ast, context, allow_materialized, false, false, false); + InterpreterInsertQuery interp( + query_ast, + context, + allow_materialized, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interp.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index b86845d48e0..0f9bd8b6ff9 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -743,10 +743,11 @@ bool StorageFileLog::streamToViews() InterpreterInsertQuery interpreter( insert, new_context, - false, - true, - true, - false); + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); + auto block_io = interpreter.execute(); /// Each stream responsible for closing it's files and store meta diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp deleted file mode 100644 index 1ca7c1f71d0..00000000000 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ /dev/null @@ -1,1207 +0,0 @@ -#include "config.h" - -#if USE_HDFS - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include - -#include - -namespace fs = std::filesystem; - -namespace ProfileEvents -{ - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ACCESS_DENIED; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; - extern const int CANNOT_COMPILE_REGEXP; - extern const int CANNOT_DETECT_FORMAT; -} -namespace -{ - struct HDFSFileInfoDeleter - { - /// Can have only one entry (see hdfsGetPathInfo()) - void operator()(hdfsFileInfo * info) { hdfsFreeFileInfo(info, 1); } - }; - using HDFSFileInfoPtr = std::unique_ptr; - - /* Recursive directory listing with matched paths as a result. - * Have the same method in StorageFile. - */ - std::vector LSWithRegexpMatching( - const String & path_for_ls, - const HDFSFSPtr & fs, - const String & for_match) - { - std::vector result; - - const size_t first_glob_pos = for_match.find_first_of("*?{"); - - if (first_glob_pos == std::string::npos) - { - const String path = fs::path(path_for_ls + for_match.substr(1)).lexically_normal(); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path.c_str())); - if (hdfs_info) // NOLINT - { - result.push_back(StorageHDFS::PathWithInfo{ - String(path), - StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}}); - } - return result; - } - - const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/'); - const String suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' - const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/' - - const size_t next_slash_after_glob_pos = suffix_with_globs.find('/', 1); - - const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos); - - re2::RE2 matcher(makeRegexpPatternFromGlobs(current_glob)); - if (!matcher.ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", for_match, matcher.error()); - - HDFSFileInfo ls; - ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length); - if (ls.file_info == nullptr && errno != ENOENT) // NOLINT - { - // ignore file not found exception, keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno. - throw Exception( - ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", prefix_without_globs, String(hdfsGetLastError())); - } - - if (!ls.file_info && ls.length > 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null"); - for (int i = 0; i < ls.length; ++i) - { - const String full_path = fs::path(ls.file_info[i].mName).lexically_normal(); - const size_t last_slash = full_path.rfind('/'); - const String file_name = full_path.substr(last_slash); - const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos; - const bool is_directory = ls.file_info[i].mKind == 'D'; - /// Condition with type of current file_info means what kind of path is it in current iteration of ls - if (!is_directory && !looking_for_directory) - { - if (re2::RE2::FullMatch(file_name, matcher)) - result.push_back(StorageHDFS::PathWithInfo{ - String(full_path), - StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}}); - } - else if (is_directory && looking_for_directory) - { - if (re2::RE2::FullMatch(file_name, matcher)) - { - std::vector result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, - suffix_with_globs.substr(next_slash_after_glob_pos)); - /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check. - std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); - } - } - } - - return result; - } - - std::pair getPathFromUriAndUriWithoutPath(const String & uri) - { - auto pos = uri.find("//"); - if (pos != std::string::npos && pos + 2 < uri.length()) - { - pos = uri.find('/', pos + 2); - if (pos != std::string::npos) - return {uri.substr(pos), uri.substr(0, pos)}; - } - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage HDFS requires valid URL to be set"); - } - - std::vector getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context) - { - HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - HDFSFSPtr fs = createHDFSFS(builder.get()); - - Strings paths = expandSelectionGlob(path_from_uri); - - std::vector res; - - for (const auto & path : paths) - { - auto part_of_res = LSWithRegexpMatching("/", fs, path); - res.insert(res.end(), part_of_res.begin(), part_of_res.end()); - } - return res; - } -} - -StorageHDFS::StorageHDFS( - const String & uri_, - const StorageID & table_id_, - const String & format_name_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - const ContextPtr & context_, - const String & compression_method_, - const bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , WithContext(context_) - , uris({uri_}) - , format_name(format_name_) - , compression_method(compression_method_) - , distributed_processing(distributed_processing_) - , partition_by(partition_by_) -{ - if (format_name != "auto") - FormatFactory::instance().checkFormatName(format_name); - context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); - checkHDFSURL(uri_); - - String path = uri_.substr(uri_.find('/', uri_.find("//") + 2)); - is_path_with_globs = path.find_first_of("*?{") != std::string::npos; - - StorageInMemoryMetadata storage_metadata; - - if (columns_.empty()) - { - ColumnsDescription columns; - if (format_name == "auto") - std::tie(columns, format_name) = getTableStructureAndFormatFromData(uri_, compression_method_, context_); - else - columns = getTableStructureFromData(format_name, uri_, compression_method, context_); - - storage_metadata.setColumns(columns); - } - else - { - if (format_name == "auto") - format_name = getTableStructureAndFormatFromData(uri_, compression_method_, context_).second; - - /// We don't allow special columns in HDFS storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine HDFS doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); -} - -namespace -{ - class ReadBufferIterator : public IReadBufferIterator, WithContext - { - public: - ReadBufferIterator( - const std::vector & paths_with_info_, - const String & uri_without_path_, - std::optional format_, - const String & compression_method_, - const ContextPtr & context_) - : WithContext(context_) - , paths_with_info(paths_with_info_) - , uri_without_path(uri_without_path_) - , format(std::move(format_)) - , compression_method(compression_method_) - { - } - - Data next() override - { - bool is_first = current_index == 0; - /// For default mode check cached columns for all paths on first iteration. - if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(paths_with_info)) - return {nullptr, cached_columns, format}; - } - - StorageHDFS::PathWithInfo path_with_info; - - while (true) - { - if (current_index == paths_with_info.size()) - { - if (is_first) - { - if (format) - throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because all files are empty. " - "You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because all files are empty. You can specify table structure manually"); - } - return {nullptr, std::nullopt, format}; - } - - path_with_info = paths_with_info[current_index++]; - if (getContext()->getSettingsRef().hdfs_skip_empty_files && path_with_info.info && path_with_info.info->size == 0) - continue; - - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - std::vector paths = {path_with_info}; - if (auto cached_columns = tryGetColumnsFromCache(paths)) - return {nullptr, cached_columns, format}; - } - - auto compression = chooseCompressionMethod(path_with_info.path, compression_method); - auto impl = std::make_unique(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); - if (!getContext()->getSettingsRef().hdfs_skip_empty_files || !impl->eof()) - { - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - return {wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)), std::nullopt, format}; - } - } - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs) - return; - - String source = uri_without_path + paths_with_info[current_index - 1].path; - auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = uri_without_path + paths_with_info[current_index - 1].path; - auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addColumns(key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - Strings sources; - sources.reserve(paths_with_info.size()); - std::transform(paths_with_info.begin(), paths_with_info.end(), std::back_inserter(sources), [&](const StorageHDFS::PathWithInfo & path_with_info){ return uri_without_path + path_with_info.path; }); - auto cache_keys = getKeysForSchemaCache(sources, *format, {}, getContext()); - StorageHDFS::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override - { - if (current_index != 0) - return paths_with_info[current_index - 1].path; - - return ""; - } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - chassert(current_index > 0 && current_index <= paths_with_info.size()); - auto path_with_info = paths_with_info[current_index - 1]; - auto compression = chooseCompressionMethod(path_with_info.path, compression_method); - auto impl = std::make_unique(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - return wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); - } - - private: - std::optional tryGetColumnsFromCache(const std::vector & paths_with_info_) - { - auto context = getContext(); - - if (!context->getSettingsRef().schema_inference_use_cache_for_hdfs) - return std::nullopt; - - auto & schema_cache = StorageHDFS::getSchemaCache(context); - for (const auto & path_with_info : paths_with_info_) - { - auto get_last_mod_time = [&]() -> std::optional - { - if (path_with_info.info) - return path_with_info.info->last_mod_time; - - auto builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_with_info.path.c_str())); - if (hdfs_info) - return hdfs_info->mLastMod; - - return std::nullopt; - }; - - String url = uri_without_path + path_with_info.path; - if (format) - { - auto cache_key = getKeyForSchemaCache(url, *format, {}, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; - } - else - { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry for some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) - { - auto cache_key = getKeyForSchemaCache(url, format_name, {}, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } - } - } - } - - return std::nullopt; - } - - const std::vector & paths_with_info; - const String & uri_without_path; - std::optional format; - const String & compression_method; - size_t current_index = 0; - }; -} - -std::pair StorageHDFS::getTableStructureAndFormatFromDataImpl( - std::optional format, - const String & uri, - const String & compression_method, - const ContextPtr & ctx) -{ - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - auto paths_with_info = getPathsList(path_from_uri, uri, ctx); - - if (paths_with_info.empty() && (!format || !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(*format))) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files in HDFS with provided path." - " You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The data format cannot be detected by the contents of the files, because there are no files in HDFS with provided path." - " You can specify the format manually"); - } - - ReadBufferIterator read_buffer_iterator(paths_with_info, uri_without_path, format, compression_method, ctx); - if (format) - return {readSchemaFromFormat(*format, std::nullopt, read_buffer_iterator, ctx), *format}; - return detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, ctx); -} - -std::pair StorageHDFS::getTableStructureAndFormatFromData(const String & uri, const String & compression_method, const ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(std::nullopt, uri, compression_method, ctx); -} - -ColumnsDescription StorageHDFS::getTableStructureFromData(const String & format, const String & uri, const String & compression_method, const DB::ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(format, uri, compression_method, ctx).first; -} - -class HDFSSource::DisclosedGlobIterator::Impl -{ -public: - Impl(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - { - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - uris = getPathsList(path_from_uri, uri_without_path, context); - ActionsDAGPtr filter_dag; - if (!uris.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - std::vector paths; - paths.reserve(uris.size()); - for (const auto & path_with_info : uris) - paths.push_back(path_with_info.path); - - VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, context); - } - auto file_progress_callback = context->getFileProgressCallback(); - - for (auto & elem : uris) - { - elem.path = uri_without_path + elem.path; - if (file_progress_callback && elem.info) - file_progress_callback(FileProgress(0, elem.info->size)); - } - uris_iter = uris.begin(); - } - - StorageHDFS::PathWithInfo next() - { - std::lock_guard lock(mutex); - if (uris_iter != uris.end()) - { - auto answer = *uris_iter; - ++uris_iter; - return answer; - } - return {}; - } -private: - std::mutex mutex; - std::vector uris; - std::vector::iterator uris_iter; -}; - -class HDFSSource::URISIterator::Impl : WithContext -{ -public: - explicit Impl(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context_) - : WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback()) - { - ActionsDAGPtr filter_dag; - if (!uris.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - std::vector paths; - paths.reserve(uris.size()); - for (const auto & uri : uris) - paths.push_back(getPathFromUriAndUriWithoutPath(uri).first); - - VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, getContext()); - } - - if (!uris.empty()) - { - auto path_and_uri = getPathFromUriAndUriWithoutPath(uris[0]); - builder = createHDFSBuilder(path_and_uri.second + "/", getContext()->getGlobalContext()->getConfigRef()); - fs = createHDFSFS(builder.get()); - } - } - - StorageHDFS::PathWithInfo next() - { - String uri; - HDFSFileInfoPtr hdfs_info; - do - { - size_t current_index = index.fetch_add(1); - if (current_index >= uris.size()) - return {"", {}}; - - uri = uris[current_index]; - auto path_and_uri = getPathFromUriAndUriWithoutPath(uri); - hdfs_info.reset(hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str())); - } - /// Skip non-existed files. - while (!hdfs_info && String(hdfsGetLastError()).find("FileNotFoundException") != std::string::npos); - - std::optional info; - if (hdfs_info) - { - info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; - if (file_progress_callback) - file_progress_callback(FileProgress(0, hdfs_info->mSize)); - } - - return {uri, info}; - } - -private: - std::atomic_size_t index = 0; - Strings uris; - HDFSBuilderWrapper builder; - HDFSFSPtr fs; - std::function file_progress_callback; -}; - -HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - : pimpl(std::make_shared(uri, predicate, virtual_columns, context)) {} - -StorageHDFS::PathWithInfo HDFSSource::DisclosedGlobIterator::next() -{ - return pimpl->next(); -} - -HDFSSource::URISIterator::URISIterator(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - : pimpl(std::make_shared(uris_, predicate, virtual_columns, context)) -{ -} - -StorageHDFS::PathWithInfo HDFSSource::URISIterator::next() -{ - return pimpl->next(); -} - -HDFSSource::HDFSSource( - const ReadFromFormatInfo & info, - StorageHDFSPtr storage_, - const ContextPtr & context_, - UInt64 max_block_size_, - std::shared_ptr file_iterator_, - bool need_only_count_) - : ISource(info.source_header, false) - , WithContext(context_) - , storage(std::move(storage_)) - , block_for_format(info.format_header) - , requested_columns(info.requested_columns) - , requested_virtual_columns(info.requested_virtual_columns) - , max_block_size(max_block_size_) - , file_iterator(file_iterator_) - , columns_description(info.columns_description) - , need_only_count(need_only_count_) -{ - initialize(); -} - -HDFSSource::~HDFSSource() = default; - -bool HDFSSource::initialize() -{ - bool skip_empty_files = getContext()->getSettingsRef().hdfs_skip_empty_files; - StorageHDFS::PathWithInfo path_with_info; - while (true) - { - path_with_info = (*file_iterator)(); - if (path_with_info.path.empty()) - return false; - - if (path_with_info.info && skip_empty_files && path_with_info.info->size == 0) - continue; - - current_path = path_with_info.path; - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path); - - std::optional file_size; - if (!path_with_info.info) - { - auto builder = createHDFSBuilder(uri_without_path + "/", getContext()->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_from_uri.c_str())); - if (hdfs_info) - path_with_info.info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; - } - - if (path_with_info.info) - file_size = path_with_info.info->size; - - auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method); - auto impl = std::make_unique( - uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings(), 0, false, file_size); - if (!skip_empty_files || !impl->eof()) - { - impl->setProgressCallback(getContext()); - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); - break; - } - } - - current_path = path_with_info.path; - current_file_size = path_with_info.info ? std::optional(path_with_info.info->size) : std::nullopt; - - QueryPipelineBuilder builder; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(path_with_info) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use a special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - auto source = std::make_shared(block_for_format, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - std::optional max_parsing_threads; - if (need_only_count) - max_parsing_threads = 1; - - input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, std::nullopt, max_parsing_threads); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - if (columns_description.hasDefaults()) - { - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, columns_description, *input_format, getContext()); - }); - } - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from the chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - return true; -} - -String HDFSSource::getName() const -{ - return "HDFSSource"; -} - -Chunk HDFSSource::generate() -{ - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (input_format) - chunk_size = input_format->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, current_path, current_file_size); - return chunk; - } - - if (input_format && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(current_path, total_rows_in_file); - - total_rows_in_file = 0; - - reader.reset(); - pipeline.reset(); - input_format.reset(); - read_buf.reset(); - - if (!initialize()) - break; - } - return {}; -} - -void HDFSSource::addNumRowsToCache(const String & path, size_t num_rows) -{ - auto cache_key = getKeyForSchemaCache(path, storage->format_name, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional HDFSSource::tryGetNumRowsFromCache(const StorageHDFS::PathWithInfo & path_with_info) -{ - auto cache_key = getKeyForSchemaCache(path_with_info.path, storage->format_name, std::nullopt, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - if (path_with_info.info) - return path_with_info.info->last_mod_time; - return std::nullopt; - }; - - return StorageHDFS::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -class HDFSSink : public SinkToStorage -{ -public: - HDFSSink(const String & uri, - const String & format, - const Block & sample_block, - const ContextPtr & context, - const CompressionMethod compression_method) - : SinkToStorage(sample_block) - { - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique( - uri, context->getGlobalContext()->getConfigRef(), context->getSettingsRef().hdfs_replication, context->getWriteSettings()), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context); - } - - String getName() const override { return "HDFSSink"; } - - void consume(Chunk & chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->sync(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf->finalize(); - } - - std::unique_ptr write_buf; - OutputFormatPtr writer; - std::mutex cancel_mutex; - bool cancelled = false; -}; - -namespace -{ - std::optional checkAndGetNewFileOnInsertIfNeeded(const ContextPtr & context, const String & uri, size_t sequence_number) - { - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - - HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - HDFSFSPtr fs = createHDFSFS(builder.get()); - - if (context->getSettingsRef().hdfs_truncate_on_insert || hdfsExists(fs.get(), path_from_uri.c_str())) - return std::nullopt; - - if (context->getSettingsRef().hdfs_create_new_file_on_insert) - { - auto pos = uri.find_first_of('.', uri.find_last_of('/')); - String new_uri; - do - { - new_uri = uri.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : uri.substr(pos)); - ++sequence_number; - } - while (!hdfsExists(fs.get(), new_uri.c_str())); - - return new_uri; - } - - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "File with path {} already exists. If you want to overwrite it, enable setting hdfs_truncate_on_insert, " - "if you want to create new file on each insert, enable setting hdfs_create_new_file_on_insert", - path_from_uri); - } -} - -class PartitionedHDFSSink : public PartitionedSink -{ -public: - PartitionedHDFSSink( - const ASTPtr & partition_by, - const String & uri_, - const String & format_, - const Block & sample_block_, - ContextPtr context_, - const CompressionMethod compression_method_) - : PartitionedSink(partition_by, context_, sample_block_) - , uri(uri_) - , format(format_) - , sample_block(sample_block_) - , context(context_) - , compression_method(compression_method_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto path = PartitionedSink::replaceWildcards(uri, partition_id); - PartitionedSink::validatePartitionKey(path, true); - if (auto new_path = checkAndGetNewFileOnInsertIfNeeded(context, path, 1)) - path = *new_path; - return std::make_shared(path, format, sample_block, context, compression_method); - } - -private: - const String uri; - const String format; - const Block sample_block; - ContextPtr context; - const CompressionMethod compression_method; -}; - - -bool StorageHDFS::supportsSubsetOfColumns(const ContextPtr & context_) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context_); -} - -class ReadFromHDFS : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromHDFS"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromHDFS( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - ReadFromFormatInfo info_, - bool need_only_count_, - std::shared_ptr storage_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter( - DataStream{.header = std::move(sample_block)}, - column_names_, - query_info_, - storage_snapshot_, - context_) - , info(std::move(info_)) - , need_only_count(need_only_count_) - , storage(std::move(storage_)) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - } - -private: - ReadFromFormatInfo info; - const bool need_only_count; - std::shared_ptr storage; - - size_t max_block_size; - size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - -void ReadFromHDFS::applyFilters(ActionDAGNodes added_filter_nodes) -{ - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); -} - -void StorageHDFS::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context_, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_)); - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && context_->getSettingsRef().optimize_count_from_files; - - auto this_ptr = std::static_pointer_cast(shared_from_this()); - - auto reading = std::make_unique( - column_names, - query_info, - storage_snapshot, - context_, - read_from_format_info.source_header, - std::move(read_from_format_info), - need_only_count, - std::move(this_ptr), - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - if (storage->distributed_processing) - { - iterator_wrapper = std::make_shared( - [callback = context->getReadTaskCallback()]() -> StorageHDFS::PathWithInfo { - return StorageHDFS::PathWithInfo{callback(), std::nullopt}; - }); - } - else if (storage->is_path_with_globs) - { - /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared(storage->uris[0], predicate, storage->getVirtualsList(), context); - iterator_wrapper = std::make_shared([glob_iterator]() - { - return glob_iterator->next(); - }); - } - else - { - auto uris_iterator = std::make_shared(storage->uris, predicate, storage->getVirtualsList(), context); - iterator_wrapper = std::make_shared([uris_iterator]() - { - return uris_iterator->next(); - }); - } -} - -void ReadFromHDFS::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - createIterator(nullptr); - - Pipes pipes; - for (size_t i = 0; i < num_streams; ++i) - { - pipes.emplace_back(std::make_shared( - info, - storage, - context, - max_block_size, - iterator_wrapper, - need_only_count)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageHDFS::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context_, bool /*async_insert*/) -{ - String current_uri = uris.front(); - - bool has_wildcards = current_uri.find(PartitionedSink::PARTITION_ID_WILDCARD) != String::npos; - const auto * insert_query = dynamic_cast(query.get()); - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && has_wildcards; - - if (is_partitioned_implementation) - { - String path = current_uri.substr(current_uri.find('/', current_uri.find("//") + 2)); - if (PartitionedSink::replaceWildcards(path, "").find_first_of("*?{") != std::string::npos) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "URI '{}' contains globs, so the table is in readonly mode", uris.back()); - - return std::make_shared( - partition_by_ast, - current_uri, - format_name, - metadata_snapshot->getSampleBlock(), - context_, - chooseCompressionMethod(current_uri, compression_method)); - } - else - { - if (is_path_with_globs) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "URI '{}' contains globs, so the table is in readonly mode", uris.back()); - - if (auto new_uri = checkAndGetNewFileOnInsertIfNeeded(context_, uris.front(), uris.size())) - { - uris.push_back(*new_uri); - current_uri = *new_uri; - } - - return std::make_shared(current_uri, - format_name, - metadata_snapshot->getSampleBlock(), - context_, - chooseCompressionMethod(current_uri, compression_method)); - } -} - -void StorageHDFS::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) -{ - const size_t begin_of_path = uris[0].find('/', uris[0].find("//") + 2); - const String url = uris[0].substr(0, begin_of_path); - - HDFSBuilderWrapper builder = createHDFSBuilder(url + "/", local_context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - - for (const auto & uri : uris) - { - const String path = uri.substr(begin_of_path); - int ret = hdfsDelete(fs.get(), path.data(), 0); - if (ret) - throw Exception(ErrorCodes::ACCESS_DENIED, "Unable to truncate hdfs table: {}", std::string(hdfsGetLastError())); - } -} - - -void registerStorageHDFS(StorageFactory & factory) -{ - factory.registerStorage("HDFS", [](const StorageFactory::Arguments & args) - { - ASTs & engine_args = args.engine_args; - - if (engine_args.empty() || engine_args.size() > 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage HDFS requires 1, 2 or 3 arguments: " - "url, name of used format (taken from file extension by default) and optional compression method."); - - engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext()); - - String url = checkAndGetLiteralArgument(engine_args[0], "url"); - - String format_name = "auto"; - if (engine_args.size() > 1) - { - engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext()); - format_name = checkAndGetLiteralArgument(engine_args[1], "format_name"); - } - - if (format_name == "auto") - format_name = FormatFactory::instance().tryGetFormatFromFileName(url).value_or("auto"); - - String compression_method; - if (engine_args.size() == 3) - { - engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.getLocalContext()); - compression_method = checkAndGetLiteralArgument(engine_args[2], "compression_method"); - } else compression_method = "auto"; - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - return std::make_shared( - url, args.table_id, format_name, args.columns, args.constraints, args.comment, args.getContext(), compression_method, false, partition_by); - }, - { - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::HDFS, - }); -} - -SchemaCache & StorageHDFS::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_hdfs", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} - -} - -#endif diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index f92c6ae67c9..809401bb279 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1099,7 +1099,13 @@ bool StorageKafka::streamToViews() // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true, false); + InterpreterInsertQuery interpreter( + insert, + kafka_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); // Create a stream for each consumer and join them in a union stream diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index 57d75b969c3..ff5214a5e51 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -377,7 +377,13 @@ void RefreshTask::executeRefreshUnlocked(std::shared_ptr(); - if (storage.getDeduplicationLog()) - { - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", - storage.getStorageID().getNameForLogs()); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); - if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo has to be initialized with user token for table: {}, user dedup token {}", - storage.getStorageID().getNameForLogs(), - context->getSettingsRef().insert_deduplication_token.value); - - if (token_info->tokenInitialized()) - block_dedup_token = token_info->getToken(); - } + String block_dedup_token; + if (token_info->tokenInitialized()) + block_dedup_token = token_info->getToken(); for (auto & current_block : part_blocks) { @@ -161,7 +152,6 @@ void MergeTreeSink::consume(Chunk & chunk) partitions = DelayedPartitions{}; } - /// TODO block_dedup_token partitions.emplace_back(MergeTreeSink::DelayedChunk::Partition { .temp_part = std::move(temp_part), diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index cf3af59118e..b15b80864e5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -304,12 +304,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); - if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo has to be initialized with user token for table: {} user dedup token {}", - storage.getStorageID().getNameForLogs(), - context->getSettingsRef().insert_deduplication_token.value); - if (token_info->tokenInitialized()) block_dedup_token = token_info->getToken(); } diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 9c6d70f2c5b..8f0e2d76473 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -644,7 +644,13 @@ bool StorageNATS::streamToViews() insert->table_id = table_id; // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, nats_context, false, true, true, false); + InterpreterInsertQuery interpreter( + insert, + nats_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index 57c8d24ccc2..44479bd01e2 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -697,7 +697,13 @@ void MaterializedPostgreSQLConsumer::syncTables() insert->table_id = storage->getStorageID(); insert->columns = std::make_shared(buffer->columns_ast); - InterpreterInsertQuery interpreter(insert, insert_context, true, false, false, false); + InterpreterInsertQuery interpreter( + insert, + insert_context, + /* allow_materialized */ true, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto io = interpreter.execute(); auto input = std::make_shared( result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows())); diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 4a5a621aa43..f632e553a0d 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -437,7 +437,13 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection auto insert_context = materialized_storage->getNestedTableContext(); - InterpreterInsertQuery interpreter(insert, insert_context, false, false, false, false); + InterpreterInsertQuery interpreter( + insert, + insert_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interpreter.execute(); const StorageInMemoryMetadata & storage_metadata = nested_storage->getInMemoryMetadata(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 5bf5ab9b2f5..f3d2aff68c8 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1129,7 +1129,13 @@ bool StorageRabbitMQ::tryStreamToViews() } // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true, false); + InterpreterInsertQuery interpreter( + insert, + rabbitmq_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index b9aa7881bdd..d1607843364 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -477,7 +477,13 @@ bool StorageS3Queue::streamToViews() while (!shutdown_called && !file_iterator->isFinished()) { - InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true, false); + InterpreterInsertQuery interpreter( + insert, + s3queue_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); auto read_from_format_info = prepareReadingFromFormat( block_io.pipeline.getHeader().getNames(), diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 10eecd63e3c..b064fba223a 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -1020,7 +1020,13 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl auto insert_context = Context::createCopy(getContext()); insert_context->makeQueryContext(); - InterpreterInsertQuery interpreter(insert, insert_context, allow_materialized, false, false, false); + InterpreterInsertQuery interpreter( + insert, + insert_context, + allow_materialized, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interpreter.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 1c129e34170..67586985ce8 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1050,7 +1050,13 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu const auto & shard_info = shards_info[shard_index]; if (shard_info.isLocal()) { - InterpreterInsertQuery interpreter(new_query, query_context, false, false, false, false); + InterpreterInsertQuery interpreter( + new_query, + query_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); pipeline.addCompletedPipeline(interpreter.execute().pipeline); } else diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 65aa06f8506..b1dd5f8a114 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -690,7 +690,13 @@ inline void StorageWindowView::fire(UInt32 watermark) StoragePtr target_table = getTargetTable(); auto insert = std::make_shared(); insert->table_id = target_table->getStorageID(); - InterpreterInsertQuery interpreter(insert, getContext(), false, false, false, false); + InterpreterInsertQuery interpreter( + insert, + getContext(), + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interpreter.execute(); auto pipe = Pipe(std::make_shared(blocks, header)); @@ -1548,11 +1554,12 @@ void StorageWindowView::writeIntoWindowView( return std::make_shared(stream_header); }); +#ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { - return std::make_shared("StorageWindowView: Afrer tmp table before squasing", true, stream_header); + return std::make_shared("StorageWindowView: Afrer tmp table before squashing", true, stream_header); }); - +#endif builder.addSimpleTransform([&](const Block & current_header) { @@ -1593,10 +1600,12 @@ void StorageWindowView::writeIntoWindowView( lateness_upper_bound); }); +#ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { return std::make_shared("StorageWindowView: Afrer WatermarkTransform", true, stream_header); }); +#endif auto inner_table = window_view.getInnerTable(); auto lock = inner_table->lockForShare( @@ -1617,10 +1626,12 @@ void StorageWindowView::writeIntoWindowView( builder.addSimpleTransform([&](const Block & header_) { return std::make_shared(header_, convert_actions); }); } +#ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { return std::make_shared("StorageWindowView: Before out", true, stream_header); }); +#endif builder.addChain(Chain(std::move(output))); builder.setSinks([&](const Block & cur_header, Pipe::StreamType) From a38f8d6c459ed597ce60de0108ad79dac6044b37 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 26 Jun 2024 21:03:53 +0200 Subject: [PATCH 169/439] rework TokenInfo::BuildingStage --- .../DeduplicationTokenTransforms.cpp | 61 +++++++++++-------- .../Transforms/DeduplicationTokenTransforms.h | 54 ++++++++++------ src/Storages/MergeTree/MergeTreeSink.cpp | 10 +-- .../MergeTree/ReplicatedMergeTreeSink.cpp | 10 +-- 4 files changed, 82 insertions(+), 53 deletions(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index 23e32415f6a..10c21249ebc 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -28,8 +28,8 @@ namespace DeduplicationToken String TokenInfo::getToken() const { - if (stage != VIEW_ID) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + if (!isDefined()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is not defined, stage {}, token {}", stage, debugToken()); return getTokenImpl(); } @@ -54,59 +54,70 @@ String TokenInfo::debugToken() const return getTokenImpl(); } - -void TokenInfo::addPieceToInitialToken(String part) +void TokenInfo::addChunkHash(String part) { - if (stage != INITIAL) + if (stage == UNDEFINED) + stage = DEFINE_SOURCE_WITH_HASHES; + + if (stage != DEFINE_SOURCE_WITH_HASHES) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + addTokenPart(std::move(part)); } -void TokenInfo::closeInitialToken() +void TokenInfo::defineSourceWithChunkHashes() { - chassert(stage == INITIAL); - stage = VIEW_ID; + if (stage != DEFINE_SOURCE_WITH_HASHES) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + stage = DEFINED; } void TokenInfo::setUserToken(const String & token) { - if (stage != INITIAL) + if (stage == UNDEFINED) + stage = DEFINE_SOURCE_USER_TOKEN; + + if (stage != DEFINE_SOURCE_USER_TOKEN) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); addTokenPart(fmt::format("user-token-{}", token)); - stage = SOURCE_BLOCK_NUMBER; } -void TokenInfo::setSourceBlockNumber(size_t block_number) +void TokenInfo::defineSourceWithUserToken(size_t block_number) { - if (stage != SOURCE_BLOCK_NUMBER) + if (stage != DEFINE_SOURCE_USER_TOKEN) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); addTokenPart(fmt::format("source-number-{}", block_number)); - stage = VIEW_ID; + + stage = DEFINED; } void TokenInfo::setViewID(const String & id) { - if (stage != VIEW_ID) + if (stage == DEFINED) + stage = DEFINE_VIEW; + + if (stage != DEFINE_VIEW) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); addTokenPart(fmt::format("view-id-{}", id)); - stage = VIEW_BLOCK_NUMBER; } -void TokenInfo::setViewBlockNumber(size_t block_number) +void TokenInfo::defineViewID(size_t block_number) { - if (stage != VIEW_BLOCK_NUMBER) + if (stage != DEFINE_VIEW) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); addTokenPart(fmt::format("view-block-{}", block_number)); - stage = VIEW_ID; + + stage = DEFINED; } void TokenInfo::reset() { - stage = INITIAL; + stage = UNDEFINED; parts.clear(); } @@ -145,7 +156,7 @@ void CheckTokenTransform::transform(Chunk & chunk) } #endif -String SetInitialTokenTransform::getInitialToken(const Chunk & chunk) +String SetInitialTokenTransform::getChunkHash(const Chunk & chunk) { SipHash hash; for (const auto & colunm : chunk.getColumns()) @@ -165,11 +176,11 @@ void SetInitialTokenTransform::transform(Chunk & chunk) ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetInitialTokenTransform"); - if (token_info->tokenInitialized()) + if (token_info->isDefined()) return; - token_info->addPieceToInitialToken(getInitialToken(chunk)); - token_info->closeInitialToken(); + token_info->addChunkHash(getChunkHash(chunk)); + token_info->defineSourceWithChunkHashes(); } void SetUserTokenTransform::transform(Chunk & chunk) @@ -189,7 +200,7 @@ void SetSourceBlockNumberTransform::transform(Chunk & chunk) throw Exception( ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform"); - token_info->setSourceBlockNumber(block_number++); + token_info->defineSourceWithUserToken(block_number++); } void SetViewIDTransform::transform(Chunk & chunk) @@ -209,7 +220,7 @@ void SetViewBlockNumberTransform::transform(Chunk & chunk) throw Exception( ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform"); - token_info->setViewBlockNumber(block_number++); + token_info->defineViewID(block_number++); } void ResetTokenTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index ebbbb0f7590..416d4bb5f62 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -38,14 +38,18 @@ namespace DeduplicationToken String debugToken() const; bool empty() const { return parts.empty(); } - bool tokenInitialized() const { return stage != INITIAL && stage != SOURCE_BLOCK_NUMBER; } - void addPieceToInitialToken(String part); - void closeInitialToken(); + bool isDefined() const { return stage == DEFINED; } + + void addChunkHash(String part); + void defineSourceWithChunkHashes(); + void setUserToken(const String & token); - void setSourceBlockNumber(size_t block_number); + void defineSourceWithUserToken(size_t block_number); + void setViewID(const String & id); - void setViewBlockNumber(size_t block_number); + void defineViewID(size_t block_number); + void reset(); private: @@ -54,23 +58,37 @@ namespace DeduplicationToken void addTokenPart(String part); size_t getTotalSize() const; - /* Token has to be prepared in a particular order. BuildingStage ensure that token is expanded according the foloving order. - * Firstly token has expand with information about the souce. - * INITIAL -- in that stage token is expanded with several hash sums or with the user defined deduplication token. - * SOURCE_BLOCK_NUMBER -- when token is expand with user defined deduplication token, after token has to be expanded with source block number. - * After that token is considered as prepared for usage, hovewer it could be expanded with following details: - * VIEW_ID -- in that stage token is expanded with view id, token could not be used until nex stage is passed. - * VIEW_BLOCK_NUMBER - in that stage token is expanded with view block number. + /* Token has to be prepared in a particular order. + * BuildingStage ensures that token is expanded according the foloving order. + * Firstly token is expanded with information about the source. + * It could be done with two ways: add several hash sums from the source chunks or provide user defined deduplication token and its sequentional block number. + * + * transition // method + * UNDEFINED -> DEFINE_SOURCE_WITH_HASHES // addChunkHash + * DEFINE_SOURCE_WITH_HASHES -> DEFINE_SOURCE_WITH_HASHES // addChunkHash + * DEFINE_SOURCE_WITH_HASHES -> DEFINED // defineSourceWithChankHashes + * + * transition // method + * UNDEFINED -> DEFINE_SOURCE_USER_TOKEN // setUserToken + * DEFINE_SOURCE_USER_TOKEN -> DEFINED // defineSourceWithUserToken + * + * After token is define it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views. + * + * transition // method + * DEFINED -> DEFINE_VIEW // setViewID + * DEFINE_VIEW -> DEFINED // defineViewID */ + enum BuildingStage { - INITIAL, - SOURCE_BLOCK_NUMBER, - VIEW_ID, - VIEW_BLOCK_NUMBER, + UNDEFINED, + DEFINE_SOURCE_WITH_HASHES, + DEFINE_SOURCE_USER_TOKEN, + DEFINE_VIEW, + DEFINED, }; - BuildingStage stage = INITIAL; + BuildingStage stage = UNDEFINED; std::vector parts; }; @@ -128,7 +146,7 @@ namespace DeduplicationToken void transform(Chunk & chunk) override; - static String getInitialToken(const Chunk & chunk); + static String getChunkHash(const Chunk & chunk); }; class ResetTokenTransform : public ISimpleTransform diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 7cb89fa7239..532fa718efd 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -92,7 +92,7 @@ void MergeTreeSink::consume(Chunk & chunk) storage.getStorageID().getNameForLogs()); String block_dedup_token; - if (token_info->tokenInitialized()) + if (token_info->isDefined()) block_dedup_token = token_info->getToken(); for (auto & current_block : part_blocks) @@ -119,10 +119,10 @@ void MergeTreeSink::consume(Chunk & chunk) if (!temp_part.part) continue; - if (!token_info->tokenInitialized()) + if (!token_info->isDefined()) { chassert(temp_part.part); - token_info->addPieceToInitialToken(temp_part.part->getPartBlockIDHash()); + token_info->addChunkHash(temp_part.part->getPartBlockIDHash()); } if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) @@ -161,9 +161,9 @@ void MergeTreeSink::consume(Chunk & chunk) }); } - if (!token_info->tokenInitialized()) + if (!token_info->isDefined()) { - token_info->closeInitialToken(); + token_info->defineSourceWithChunkHashes(); } finishDelayedChunk(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index b15b80864e5..228b5c596ab 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -304,7 +304,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); - if (token_info->tokenInitialized()) + if (token_info->isDefined()) block_dedup_token = token_info->getToken(); } @@ -371,10 +371,10 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - if (!token_info->tokenInitialized()) + if (!token_info->isDefined()) { chassert(temp_part.part); - token_info->addPieceToInitialToken(temp_part.part->getPartBlockIDHash()); + token_info->addChunkHash(temp_part.part->getPartBlockIDHash()); } } @@ -421,9 +421,9 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) )); } - if (!token_info->tokenInitialized()) + if (!token_info->isDefined()) { - token_info->closeInitialToken(); + token_info->defineSourceWithChunkHashes(); } finishDelayedChunk(zookeeper); From 8efa045a97517bbcf28b80c178e9df84d92973b2 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 27 Jun 2024 00:09:55 +0200 Subject: [PATCH 170/439] fix resolving conflicts with squashing --- src/Interpreters/Squashing.cpp | 86 ++++++++++--------- src/Interpreters/Squashing.h | 29 ++++--- .../Transforms/ApplySquashingTransform.h | 16 +--- .../Transforms/PlanSquashingTransform.cpp | 21 ++--- .../Transforms/PlanSquashingTransform.h | 2 +- .../Transforms/SquashingTransform.cpp | 22 ++--- .../Transforms/SquashingTransform.h | 1 - src/Server/TCPHandler.cpp | 2 +- src/Storages/MergeTree/MutateTask.cpp | 2 +- 9 files changed, 85 insertions(+), 96 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index bf363a21400..dbf16452287 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -10,22 +10,24 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -Squashing::Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_) +Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) + , header(header_) { } Chunk Squashing::flush() { - decltype(chunks_to_merge_vec) to_convert; - to_convert.swap(chunks_to_merge_vec); - return convertToChunk(std::move(to_convert)); + if (!accumulated) + return {}; + + return convertToChunk(accumulated.extract()); } Chunk Squashing::squash(Chunk && input_chunk) { - if (input_chunk.getChunkInfos().empty()) + if (!input_chunk) return Chunk(); auto squash_info = input_chunk.getChunkInfos().extract(); @@ -42,48 +44,39 @@ Chunk Squashing::add(Chunk && input_chunk) return {}; /// Just read block is already enough. - if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes())) + if (isEnoughSize(input_chunk)) { /// If no accumulated data, return just read block. - if (chunks_to_merge_vec.empty()) + if (!accumulated) { - chunks_to_merge_vec.push_back(std::move(input_chunk)); - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - chunks_to_merge_vec.clear(); - return res_chunk; + accumulated.add(std::move(input_chunk)); + return convertToChunk(accumulated.extract()); } /// Return accumulated data (maybe it has small size) and place new block to accumulated data. - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - chunks_to_merge_vec.clear(); - changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); - chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(accumulated.extract()); + accumulated.add(std::move(input_chunk)); return res_chunk; } /// Accumulated block is already enough. - if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + if (isEnoughSize()) { /// Return accumulated data and place new block to accumulated data. - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - chunks_to_merge_vec.clear(); - changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); - chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(accumulated.extract()); + accumulated.add(std::move(input_chunk)); return res_chunk; } /// Pushing data into accumulating vector - expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); - chunks_to_merge_vec.push_back(std::move(input_chunk)); + accumulated.add(std::move(input_chunk)); /// If accumulated data is big enough, we send it - if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + if (isEnoughSize()) { - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - changeCurrentSize(0, 0); - chunks_to_merge_vec.clear(); - return res_chunk; + return convertToChunk(accumulated.extract()); } + return {}; } @@ -95,7 +88,8 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const auto info = std::make_shared(); info->chunks = std::move(chunks); - auto aggr_chunk = Chunk(); + // It is imortant that chunk is not empty, it has to have colums even if they are emty + auto aggr_chunk = Chunk(header.getColumns(), 0); aggr_chunk.getChunkInfos().add(std::move(info)); return aggr_chunk; @@ -136,22 +130,34 @@ Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoColl return accumulated_chunk; } -void Squashing::expandCurrentSize(size_t rows, size_t bytes) -{ - accumulated_size.rows += rows; - accumulated_size.bytes += bytes; -} - -void Squashing::changeCurrentSize(size_t rows, size_t bytes) -{ - accumulated_size.rows = rows; - accumulated_size.bytes = bytes; -} - bool Squashing::isEnoughSize(size_t rows, size_t bytes) const { return (!min_block_size_rows && !min_block_size_bytes) || (min_block_size_rows && rows >= min_block_size_rows) || (min_block_size_bytes && bytes >= min_block_size_bytes); } + +bool Squashing::isEnoughSize() const +{ + return isEnoughSize(accumulated.getRows(), accumulated.getBytes()); +}; + +bool Squashing::isEnoughSize(const Chunk & chunk) const +{ + return isEnoughSize(chunk.getNumRows(), chunk.bytes()); +} + +void Squashing::CurrentSize::add(Chunk && chunk) +{ + rows += chunk.getNumRows(); + bytes += chunk.bytes(); + chunks.push_back(std::move(chunk)); +} + +std::vector Squashing::CurrentSize::extract() +{ + auto result = std::move(chunks); + *this = {}; + return result; +} } diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 08535119241..830d621b43b 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -38,36 +38,39 @@ public: class Squashing { public: - explicit Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_); + explicit Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); Squashing(Squashing && other) = default; Chunk add(Chunk && input_chunk); static Chunk squash(Chunk && input_chunk); Chunk flush(); - bool isDataLeft() - { - return !chunks_to_merge_vec.empty(); - } - private: - struct CurrentSize + class CurrentSize { + std::vector chunks = {}; size_t rows = 0; size_t bytes = 0; + + public: + explicit operator bool () const { return !chunks.empty(); } + size_t getRows() const { return rows; } + size_t getBytes() const { return bytes; } + void add(Chunk && chunk); + std::vector extract(); }; - std::vector chunks_to_merge_vec = {}; - size_t min_block_size_rows; - size_t min_block_size_bytes; + const size_t min_block_size_rows; + const size_t min_block_size_bytes; + const Block header; - CurrentSize accumulated_size; + CurrentSize accumulated; static Chunk squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos); - void expandCurrentSize(size_t rows, size_t bytes); - void changeCurrentSize(size_t rows, size_t bytes); + bool isEnoughSize() const; bool isEnoughSize(size_t rows, size_t bytes) const; + bool isEnoughSize(const Chunk & chunk) const; Chunk convertToChunk(std::vector && chunks) const; }; diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 51bc69f6b9b..94b890198d4 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -11,7 +11,7 @@ class ApplySquashingTransform : public ExceptionKeepingTransform public: explicit ApplySquashingTransform(const Block & header, const size_t min_block_size_rows, const size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } @@ -27,18 +27,12 @@ public: } ExceptionKeepingTransform::work(); - if (finish_chunk) - { - data.chunk = std::move(finish_chunk); - ready_output = true; - } } protected: void onConsume(Chunk chunk) override { - if (auto res_chunk = DB::Squashing::squash(std::move(chunk))) - cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows()); + cur_chunk = DB::Squashing::squash(std::move(chunk)); } GenerateResult onGenerate() override @@ -48,16 +42,10 @@ protected: res.is_done = true; return res; } - void onFinish() override - { - auto chunk = DB::Squashing::squash({}); - finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); - } private: Squashing squashing; Chunk cur_chunk; - Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index d1d3fcd3205..6a8cd10027e 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -1,4 +1,6 @@ #include +#include "Common/Logger.h" +#include "Common/logger_useful.h" #include namespace DB @@ -10,22 +12,22 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : IInflatingTransform(header, header) - , squashing(min_block_size_rows, min_block_size_bytes) + Block header_, size_t min_block_size_rows, size_t min_block_size_bytes) + : IInflatingTransform(header_, header_) + , squashing(header_, min_block_size_rows, min_block_size_bytes) { } void PlanSquashingTransform::consume(Chunk chunk) { - Chunk result = squashing.add(std::move(chunk)); - if (!result.getChunkInfos().empty()) - squashed_chunk = std::move(result); + LOG_DEBUG(getLogger("PlanSquashingTransform"), "consume {}", chunk.getNumRows()); + + squashed_chunk = squashing.add(std::move(chunk)); } Chunk PlanSquashingTransform::generate() { - if (squashed_chunk.getChunkInfos().empty()) + if (!squashed_chunk) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); Chunk result_chunk; @@ -35,12 +37,11 @@ Chunk PlanSquashingTransform::generate() bool PlanSquashingTransform::canGenerate() { - return !squashed_chunk.getChunkInfos().empty(); + return bool(squashed_chunk); } Chunk PlanSquashingTransform::getRemaining() { - Chunk current_chunk = squashing.flush(); - return current_chunk; + return squashing.flush(); } } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index 4ad2ec2d089..1f83e62284d 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -10,7 +10,7 @@ class PlanSquashingTransform : public IInflatingTransform { public: PlanSquashingTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + Block header_, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "PlanSquashingTransform"; } diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 1e3798e89c8..e457a262681 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -12,15 +12,13 @@ extern const int LOGICAL_ERROR; SquashingTransform::SquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } void SquashingTransform::onConsume(Chunk chunk) { - Chunk planned_chunk = squashing.add(std::move(chunk)); - if (!planned_chunk.getChunkInfos().empty()) - cur_chunk = DB::Squashing::squash(std::move(planned_chunk)); + cur_chunk = DB::Squashing::squash(squashing.add(std::move(chunk))); } SquashingTransform::GenerateResult SquashingTransform::onGenerate() @@ -33,10 +31,7 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate() void SquashingTransform::onFinish() { - Chunk chunk = squashing.flush(); - if (!chunk.getChunkInfos().empty()) - chunk = DB::Squashing::squash(std::move(chunk)); - finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); + finish_chunk = DB::Squashing::squash(squashing.flush()); } void SquashingTransform::work() @@ -49,6 +44,7 @@ void SquashingTransform::work() } ExceptionKeepingTransform::work(); + if (finish_chunk) { data.chunk = std::move(finish_chunk); @@ -59,7 +55,7 @@ void SquashingTransform::work() SimpleSquashingTransform::SimpleSquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ISimpleTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } @@ -67,18 +63,14 @@ void SimpleSquashingTransform::transform(Chunk & chunk) { if (!finished) { - Chunk planned_chunk = squashing.add(std::move(chunk)); - if (!planned_chunk.getChunkInfos().empty()) - chunk = DB::Squashing::squash(std::move(planned_chunk)); + chunk = DB::Squashing::squash(squashing.add(std::move(chunk))); } else { if (chunk.hasRows()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - chunk = squashing.flush(); - if (!chunk.getChunkInfos().empty()) - chunk = DB::Squashing::squash(std::move(chunk)); + chunk = DB::Squashing::squash(squashing.flush()); } } diff --git a/src/Processors/Transforms/SquashingTransform.h b/src/Processors/Transforms/SquashingTransform.h index 9d1591d0bcd..c5b727ac6ec 100644 --- a/src/Processors/Transforms/SquashingTransform.h +++ b/src/Processors/Transforms/SquashingTransform.h @@ -26,7 +26,6 @@ protected: private: Squashing squashing; Chunk cur_chunk; - Chunk::ChunkInfoCollection cur_chunkinfos; Chunk finish_chunk; }; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index bc1487acefa..22d2c4eeebc 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -884,7 +884,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro using PushResult = AsynchronousInsertQueue::PushResult; startInsertQuery(); - Squashing squashing(0, query_context->getSettingsRef().async_insert_max_data_size); + Squashing squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); Block header = state.input_header; diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index a8334f22272..0beeca0d542 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1287,7 +1287,7 @@ void PartMergerWriter::prepare() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { // We split the materialization into multiple stages similar to the process of INSERT SELECT query. - projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); + projection_squashes.emplace_back(ctx->updated_header, settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } existing_rows_count = 0; From bc31f851273e719658ef70371f6b684f4e1c0e69 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 27 Jun 2024 00:29:11 +0200 Subject: [PATCH 171/439] fix style --- src/Interpreters/Squashing.cpp | 2 +- src/Interpreters/TreeRewriter.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index dbf16452287..971f0102148 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -88,7 +88,7 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const auto info = std::make_shared(); info->chunks = std::move(chunks); - // It is imortant that chunk is not empty, it has to have colums even if they are emty + // It is imortant that chunk is not empty, it has to have columns even if they are empty auto aggr_chunk = Chunk(header.getColumns(), 0); aggr_chunk.getChunkInfos().add(std::move(info)); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index a3c5a7ed3ed..6ce6f5e454e 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1188,7 +1188,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select } } - /// Check for dynamic subcolums in unknown required columns. + /// Check for dynamic subcolumns in unknown required columns. if (!unknown_required_source_columns.empty()) { for (const NameAndTypePair & pair : source_columns_ordinary) From d485606e9420eec3e617e5ec49a1c1ac16478a85 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 27 Jun 2024 02:09:45 +0200 Subject: [PATCH 172/439] fix header in async insert and projections --- src/Interpreters/Squashing.cpp | 22 ++++++++++++---------- src/Interpreters/Squashing.h | 5 ++++- src/Server/TCPHandler.cpp | 22 ++++++++-------------- src/Storages/MergeTree/MutateTask.cpp | 18 +++++++----------- 4 files changed, 31 insertions(+), 36 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 971f0102148..2b808e25fbb 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,6 +1,7 @@ #include #include #include +#include "base/defines.h" namespace DB @@ -22,7 +23,9 @@ Chunk Squashing::flush() if (!accumulated) return {}; - return convertToChunk(accumulated.extract()); + auto result = convertToChunk(accumulated.extract()); + chassert(result); + return result; } Chunk Squashing::squash(Chunk && input_chunk) @@ -73,9 +76,7 @@ Chunk Squashing::add(Chunk && input_chunk) /// If accumulated data is big enough, we send it if (isEnoughSize()) - { return convertToChunk(accumulated.extract()); - } return {}; } @@ -91,7 +92,7 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const // It is imortant that chunk is not empty, it has to have columns even if they are empty auto aggr_chunk = Chunk(header.getColumns(), 0); aggr_chunk.getChunkInfos().add(std::move(info)); - + chassert(aggr_chunk); return aggr_chunk; } @@ -118,16 +119,17 @@ Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoColl for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) { const auto source_column = columns[j]; - mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); } } - Chunk accumulated_chunk; - accumulated_chunk.setColumns(std::move(mutable_columns), rows); - accumulated_chunk.setChunkInfos(infos); - accumulated_chunk.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos())); - return accumulated_chunk; + Chunk result; + result.setColumns(std::move(mutable_columns), rows); + result.setChunkInfos(infos); + result.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos())); + + chassert(result); + return result; } bool Squashing::isEnoughSize(size_t rows, size_t bytes) const diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 830d621b43b..64a9768a71f 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -45,6 +45,9 @@ public: static Chunk squash(Chunk && input_chunk); Chunk flush(); + void setHeader(Block header_) { header = std::move(header_); } + const Block & getHeader() const { return header; } + private: class CurrentSize { @@ -62,7 +65,7 @@ private: const size_t min_block_size_rows; const size_t min_block_size_bytes; - const Block header; + Block header; CurrentSize accumulated; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 22d2c4eeebc..fd226db5bb1 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -886,16 +886,13 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro startInsertQuery(); Squashing squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); - Block header = state.input_header; - while (readDataNext()) { - header = state.block_for_insert.cloneEmpty(); - auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); - if (!planned_chunk.getChunkInfos().empty()) + squashing.setHeader(state.block_for_insert.cloneEmpty()); + auto result_chunk = DB::Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()})); + if (result_chunk) { - Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = header.cloneWithColumns(result_chunk.detachColumns()); + auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -904,16 +901,13 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - auto planned_chunk = squashing.flush(); - if (planned_chunk.getChunkInfos().empty()) + Chunk result_chunk = DB::Squashing::squash(squashing.flush()); + if (!result_chunk) { - return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(header), query_context); + return insert_queue.pushQueryWithBlock(state.parsed_query, squashing.getHeader(), query_context); } - Chunk result_chunk; - result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - - auto result = header.cloneWithColumns(result_chunk.detachColumns()); + auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 0beeca0d542..5da36b6ee3b 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1315,14 +1315,12 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); - projection_header = block_to_squash.cloneEmpty(); + projection_squashes[i].setHeader(block_to_squash.cloneEmpty()); - if (!planned_chunk.getChunkInfos().empty()) + Chunk squashed_chunk = DB::Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); + if (squashed_chunk) { - Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); - - auto result = projection_header.cloneWithColumns(projection_chunk.detachColumns()); + auto result = projection_squashes[i].getHeader().cloneWithColumns(squashed_chunk.detachColumns()); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); @@ -1343,12 +1341,10 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash_plan = projection_squashes[i]; - auto planned_chunk = projection_squash_plan.flush(); - if (!planned_chunk.getChunkInfos().empty()) + auto squashed_chunk = DB::Squashing::squash(projection_squash_plan.flush()); + if (squashed_chunk) { - Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); - - auto result = projection_header.cloneWithColumns(projection_chunk.detachColumns()); + auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns()); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); From c8bca3135de71d4adafe74c415adfc14683ad7f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rodolphe=20Dug=C3=A9=20de=20Bernonville?= Date: Wed, 26 Jun 2024 14:51:21 +0200 Subject: [PATCH 173/439] fix odbc and nullable fields --- programs/odbc-bridge/ODBCSource.cpp | 13 ++++- .../integration/test_odbc_interaction/test.py | 55 +++++++++++++++++++ 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/programs/odbc-bridge/ODBCSource.cpp b/programs/odbc-bridge/ODBCSource.cpp index 940970f36ab..41a9813ce50 100644 --- a/programs/odbc-bridge/ODBCSource.cpp +++ b/programs/odbc-bridge/ODBCSource.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -47,9 +48,17 @@ Chunk ODBCSource::generate() for (int idx = 0; idx < result.columns(); ++idx) { const auto & sample = description.sample_block.getByPosition(idx); - if (!result.is_null(idx)) - insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx); + { + if (columns[idx]->isNullable()) + { + ColumnNullable & column_nullable = assert_cast(*columns[idx]); + insertValue(column_nullable.getNestedColumn(), removeNullable(sample.type), description.types[idx].first, result, idx); + column_nullable.getNullMapData().emplace_back(0); + } + else + insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx); + } else insertDefaultValue(*columns[idx], *sample.column); } diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index 06cbe70f7c6..0d0d7a0afb1 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -40,6 +40,16 @@ create_table_sql_template = """ PRIMARY KEY (`id`)) ENGINE=InnoDB; """ +create_table_sql_nullable_template = """ + CREATE TABLE `clickhouse`.`{}` ( + `id` integer not null, + `col1` integer, + `col2` decimal(15,10), + `col3` varchar(32), + `col4` datetime + ) + """ + def skip_test_msan(instance): if instance.is_built_with_memory_sanitizer(): @@ -77,6 +87,11 @@ def create_mysql_db(conn, name): cursor.execute("CREATE DATABASE {} DEFAULT CHARACTER SET 'utf8'".format(name)) +def create_mysql_nullable_table(conn, table_name): + with conn.cursor() as cursor: + cursor.execute(create_table_sql_nullable_template.format(table_name)) + + def create_mysql_table(conn, table_name): with conn.cursor() as cursor: cursor.execute(create_table_sql_template.format(table_name)) @@ -192,6 +207,46 @@ def started_cluster(): cluster.shutdown() +def test_mysql_odbc_select_nullable(started_cluster): + skip_test_msan(node1) + mysql_setup = node1.odbc_drivers["MySQL"] + + table_name = "test_insert_nullable_select" + conn = get_mysql_conn() + create_mysql_nullable_table(conn, table_name) + with conn.cursor() as cursor: + cursor.execute( + "INSERT INTO clickhouse.{} VALUES(1, 1, 1.23456, 'data1', '2010-01-01 00:00:00');".format( + table_name + ) + ) + cursor.execute( + "INSERT INTO clickhouse.{} VALUES(2, NULL, NULL, NULL, NULL);".format( + table_name + ) + ) + conn.commit() + + node1.query( + """ + CREATE TABLE {}(id UInt32, col1 Nullable(UInt32), col2 Nullable(Decimal(15, 10)), col3 Nullable(String), col4 Nullable(DateTime)) ENGINE = ODBC('DSN={}', 'clickhouse', '{}'); + """.format( + table_name, mysql_setup["DSN"], table_name + ) + ) + + assert ( + node1.query( + "SELECT id, col1, col2, col3, col4 from {} order by id asc".format( + table_name + ) + ) + == "1\t1\t1.23456\tdata1\t2010-01-01 00:00:00\n2\t\\N\t\\N\t\\N\t\\N\n" + ) + drop_mysql_table(conn, table_name) + conn.close() + + def test_mysql_simple_select_works(started_cluster): skip_test_msan(node1) From 0bf26dbeac3e0cb6b521bf8ec9181127594c2161 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 16 May 2024 14:54:50 +0000 Subject: [PATCH 174/439] Forbid POPULATE with Replicated databases --- src/Interpreters/InterpreterCreateQuery.cpp | 6 +++--- ...33_replicated_database_forbid_create_as_select.reference | 1 + .../02933_replicated_database_forbid_create_as_select.sh | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 7272e10b801..7188bd166f4 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1305,7 +1305,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (need_add_to_database) database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (database && database->getEngineName() == "Replicated" && create.select) + if (database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) { bool is_storage_replicated = false; if (create.storage && create.storage->engine) @@ -1315,11 +1315,11 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) is_storage_replicated = true; } - const bool allow_create_select_for_replicated = create.isView() || create.is_create_empty || !is_storage_replicated; + const bool allow_create_select_for_replicated = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated; if (!allow_create_select_for_replicated) throw Exception( ErrorCodes::SUPPORT_IS_DISABLED, - "CREATE AS SELECT is not supported with Replicated databases. Use separate CREATE and INSERT queries"); + "CREATE AS SELECT and POPULATE is not supported with Replicated databases. Use separate CREATE and INSERT queries"); } if (database && database->shouldReplicateQuery(getContext(), query_ptr)) diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference index d00491fd7e5..6ed281c757a 100644 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference @@ -1 +1,2 @@ 1 +1 diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index 8a6904b6bd7..df060ee2612 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -11,6 +11,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" # Non-replicated engines are allowed ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test (id UInt64) ENGINE = MergeTree() ORDER BY id AS SELECT 1" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv (id UInt64) ENGINE = MergeTree() ORDER BY id POPULATE AS SELECT 1" # Replicated storafes are forbidden ${CLICKHOUSE_CLIENT} --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" From 6dc90798c2bce90fd5a2fbf73d69575e2f4bd693 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 27 Jun 2024 12:43:59 +0000 Subject: [PATCH 175/439] add setting database_replicated_allow_heavy_create --- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 1 + src/Interpreters/InterpreterCreateQuery.cpp | 6 ++++-- ...933_replicated_database_forbid_create_as_select.sh | 11 +++++++++-- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 41878142bdc..13751b3d1a2 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -732,6 +732,7 @@ class IColumn; M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \ M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \ M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \ + M(Bool, database_replicated_allow_heavy_create, false, "Allow long-running DDL queries (CREATE AS SELECT and POPULATE) in Replicated database engine. Note that it can block DDL queue for a long time.", 0) \ M(Bool, cloud_mode, false, "Only available in ClickHouse Cloud", 0) \ M(UInt64, cloud_mode_engine, 1, "Only available in ClickHouse Cloud", 0) \ M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw', 'none_only_active', 'throw_only_active', 'null_status_on_timeout_only_active'", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index fba6386b9bd..ee013907353 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -87,6 +87,7 @@ namespace SettingsChangesHistory static const std::map settings_changes_history = { {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + {"database_replicated_allow_heavy_create", true, false, "Allow long-running DDL queries (CREATE AS SELECT and POPULATE) in Replicated database engine."}, }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 7188bd166f4..4e4598a2574 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1305,7 +1305,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (need_add_to_database) database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) + bool allow_heavy_create = getContext()->getSettingsRef().database_replicated_allow_heavy_create; + if (!allow_heavy_create && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) { bool is_storage_replicated = false; if (create.storage && create.storage->engine) @@ -1319,7 +1320,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (!allow_create_select_for_replicated) throw Exception( ErrorCodes::SUPPORT_IS_DISABLED, - "CREATE AS SELECT and POPULATE is not supported with Replicated databases. Use separate CREATE and INSERT queries"); + "CREATE AS SELECT and POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT queries. " + "Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with caution"); } if (database && database->shouldReplicateQuery(getContext(), query_ptr)) diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index df060ee2612..831963cca8d 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -9,10 +9,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" + # Non-replicated engines are allowed ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test (id UInt64) ENGINE = MergeTree() ORDER BY id AS SELECT 1" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv (id UInt64) ENGINE = MergeTree() ORDER BY id POPULATE AS SELECT 1" + # Replicated storafes are forbidden -${CLICKHOUSE_CLIENT} --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" -${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" + +# But it is allowed with the special setting +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1 + ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" From 5ddb9b11f487b3f13cb6d7e1d69e22779b6a745f Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 27 Jun 2024 14:33:37 +0000 Subject: [PATCH 176/439] remove unwanted changes --- src/Backups/BackupIO_AzureBlobStorage.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 0ee0160a969..cee41861d70 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -36,7 +36,7 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( const WriteSettings & write_settings_, const ContextPtr & context_) : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getContainer(), false, false} + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getConnectionURL(), false, false} , connection_params(connection_params_) , blob_path(blob_path_) { @@ -128,7 +128,7 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( const ContextPtr & context_, bool attempt_to_create_container) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getContainer(), false, false} + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getConnectionURL(), false, false} , connection_params(connection_params_) , blob_path(blob_path_) { From 8d0834eadeeea4b2cd36ba8ff50bdac2d7cd3b35 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 27 Jun 2024 14:52:25 +0000 Subject: [PATCH 177/439] fix --- .../02933_replicated_database_forbid_create_as_select.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index 831963cca8d..15f169d880f 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -15,11 +15,11 @@ ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${ ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv (id UInt64) ENGINE = MergeTree() ORDER BY id POPULATE AS SELECT 1" # Replicated storafes are forbidden -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" # But it is allowed with the special setting -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1 +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1 ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" From 0220a3cac74ad0e96244c68a00a674a41dfb47c4 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 27 Jun 2024 17:38:15 +0200 Subject: [PATCH 178/439] fix tests --- src/Processors/Transforms/DeduplicationTokenTransforms.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index 10c21249ebc..bcb8ee94f7a 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -67,6 +67,9 @@ void TokenInfo::addChunkHash(String part) void TokenInfo::defineSourceWithChunkHashes() { + if (stage == UNDEFINED && empty()) + stage = DEFINE_SOURCE_WITH_HASHES; + if (stage != DEFINE_SOURCE_WITH_HASHES) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); From 9fa5764c9e330a0c7b21427b5e1972b55951d850 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Thu, 27 Jun 2024 21:57:14 +0200 Subject: [PATCH 179/439] Update src/Processors/Transforms/DeduplicationTokenTransforms.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Processors/Transforms/DeduplicationTokenTransforms.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 416d4bb5f62..c3944b8dd1d 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -72,7 +72,7 @@ namespace DeduplicationToken * UNDEFINED -> DEFINE_SOURCE_USER_TOKEN // setUserToken * DEFINE_SOURCE_USER_TOKEN -> DEFINED // defineSourceWithUserToken * - * After token is define it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views. + * After token is defined, it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views. * * transition // method * DEFINED -> DEFINE_VIEW // setViewID From 1c12c95b79d24e4fba9362d140910ac6a4d16f35 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Thu, 27 Jun 2024 21:57:24 +0200 Subject: [PATCH 180/439] Update src/Processors/Transforms/DeduplicationTokenTransforms.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Processors/Transforms/DeduplicationTokenTransforms.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index c3944b8dd1d..9d087536a38 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -59,7 +59,7 @@ namespace DeduplicationToken size_t getTotalSize() const; /* Token has to be prepared in a particular order. - * BuildingStage ensures that token is expanded according the foloving order. + * BuildingStage ensures that token is expanded according the following order. * Firstly token is expanded with information about the source. * It could be done with two ways: add several hash sums from the source chunks or provide user defined deduplication token and its sequentional block number. * From cb3d0ed2757fc6de98fdb0bad1e74f83facd7c88 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Fri, 28 Jun 2024 02:20:35 +0200 Subject: [PATCH 181/439] Update StorageMaterializedView.cpp --- src/Storages/StorageMaterializedView.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 316f398b476..f9f627863dd 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -161,6 +161,7 @@ StorageMaterializedView::StorageMaterializedView( manual_create_query->setDatabase(getStorageID().database_name); manual_create_query->setTable(generateInnerTableName(getStorageID())); manual_create_query->uuid = query.to_inner_uuid; + manual_create_query->has_uuid = true; auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); From 7d4293f6f8bcd7fcba45b703a83bf44d103395c9 Mon Sep 17 00:00:00 2001 From: divanik Date: Fri, 28 Jun 2024 00:21:19 +0000 Subject: [PATCH 182/439] Make commands more profound --- programs/disks/CommandCopy.cpp | 109 ++++++++++++++++++++------------- programs/disks/CommandMove.cpp | 81 +++++++++++++----------- 2 files changed, 110 insertions(+), 80 deletions(-) diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index f176fa277d7..0938e88a7f5 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -1,68 +1,89 @@ -#include "ICommand.h" #include +#include "Common/Exception.h" #include +#include "DisksClient.h" +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandCopy final : public ICommand { public: - CommandCopy() + explicit CommandCopy() : ICommand() { command_name = "copy"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Recursively copy data from `FROM_PATH` to `TO_PATH`"; - usage = "copy [OPTION]... "; - command_option_description->add_options() - ("disk-from", po::value(), "disk from which we copy") - ("disk-to", po::value(), "disk to which we copy"); + description = "Recursively copy data from `path-from` to `path-to`"; + options_description.add_options()( + "disk-from", po::value(), "disk from which we copy is executed (default value is a current disk)")( + "disk-to", po::value(), "disk to which copy is executed (default value is a current disk)")( + "path-from", po::value(), "path from which copy is executed (mandatory, positional)")( + "path-to", po::value(), "path to which copy is executed (mandatory, positional)")( + "recursive", "recursively copy the directory"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("disk-from")) - config.setString("disk-from", options["disk-from"].as()); - if (options.count("disk-to")) - config.setString("disk-to", options["disk-to"].as()); - } + auto disk_from = getDiskWithPath(client, options, "disk-from"); + auto disk_to = getDiskWithPath(client, options, "disk-to"); + String path_from = disk_from.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + String path_to = disk_to.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); + bool recursive = options.count("recursive"); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 2) + if (!disk_from.getDisk()->exists(path_from)) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot stat '{}': No such file or directory", path_from); } + else if (disk_from.getDisk()->isFile(path_from)) + { + auto target_location = getTargetLocation(path_from, disk_to, path_to); + if (!disk_to.getDisk()->exists(target_location) || disk_to.getDisk()->isFile(target_location)) + { + disk_from.getDisk()->copyFile( + path_from, + *disk_to.getDisk(), + target_location, + /* read_settings= */ {}, + /* write_settings= */ {}, + /* cancellation_hook= */ {}); + } + else + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "cannot overwrite directory {} with non-directory {}", target_location, path_from); + } + } + else if (disk_from.getDisk()->isDirectory(path_from)) + { + if (!recursive) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "--recursive not specified; omitting directory {}", path_from); + } + auto target_location = getTargetLocation(path_from, disk_to, path_to); - String disk_name_from = config.getString("disk-from", config.getString("disk", "default")); - String disk_name_to = config.getString("disk-to", config.getString("disk", "default")); - - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk_from = disk_selector->get(disk_name_from); - DiskPtr disk_to = disk_selector->get(disk_name_to); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to, /* read_settings= */ {}, /* write_settings= */ {}, /* cancellation_hook= */ {}); + if (disk_to.getDisk()->isFile(target_location)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory {} with directory {}", path_to, target_location); + } + else if (!disk_to.getDisk()->exists(target_location)) + { + disk_to.getDisk()->createDirectory(target_location); + } + disk_from.getDisk()->copyDirectoryContent( + path_from, + disk_to.getDisk(), + target_location, + /* read_settings= */ {}, + /* write_settings= */ {}, + /* cancellation_hook= */ {}); + } } }; -} -std::unique_ptr makeCommandCopy() +CommandPtr makeCommandCopy() { return std::make_unique(); } +} diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 75cf96252ed..6080fcf6811 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -1,14 +1,9 @@ -#include "ICommand.h" #include +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandMove final : public ICommand { public: @@ -16,44 +11,58 @@ public: { command_name = "move"; description = "Move file or directory from `from_path` to `to_path`"; - usage = "move [OPTION]... "; + options_description.add_options()("path-from", po::value(), "path from which we copy (mandatory, positional)")( + "path-to", po::value(), "path to which we copy (mandatory, positional)"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override - {} - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (command_arguments.size() != 2) + auto disk = client.getCurrentDiskWithPath(); + + String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); + + if (disk.getDisk()->isFile(path_from)) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + disk.getDisk()->moveFile(path_from, path_to); + } + else if (disk.getDisk()->isDirectory(path_from)) + { + auto target_location = getTargetLocation(path_from, disk, path_to); + if (!disk.getDisk()->exists(target_location)) + { + disk.getDisk()->createDirectory(target_location); + disk.getDisk()->moveDirectory(path_from, target_location); + } + else + { + if (disk.getDisk()->isFile(target_location)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory '{}' with directory '{}'", target_location, path_from); + } + if (!disk.getDisk()->isDirectoryEmpty(target_location)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot move '{}' to '{}': Directory not empty", path_from, target_location); + } + else + { + disk.getDisk()->moveDirectory(path_from, target_location); + } + } + } + else if (!disk.getDisk()->exists(path_from)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot stat '{}': No such file or directory", path_from); } - - String disk_name = config.getString("disk", "default"); - - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - if (disk->isFile(relative_path_from)) - disk->moveFile(relative_path_from, relative_path_to); - else - disk->moveDirectory(relative_path_from, relative_path_to); } }; -} -std::unique_ptr makeCommandMove() +CommandPtr makeCommandMove() { return std::make_unique(); } + +} From b16451ad8946fdeb93ca259af083467853b6ac22 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Fri, 28 Jun 2024 02:28:07 +0200 Subject: [PATCH 183/439] Update StorageMaterializedView.cpp --- src/Storages/StorageMaterializedView.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index f9f627863dd..ec1559b71a4 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -161,7 +161,7 @@ StorageMaterializedView::StorageMaterializedView( manual_create_query->setDatabase(getStorageID().database_name); manual_create_query->setTable(generateInnerTableName(getStorageID())); manual_create_query->uuid = query.to_inner_uuid; - manual_create_query->has_uuid = true; + manual_create_query->has_uuid = query.to_inner_uuid != UUIDHelpers::Nil; auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); From 146a7e13d950cb6b122fe23d336092cbe009b7d3 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Thu, 27 Jun 2024 08:44:46 +0000 Subject: [PATCH 184/439] Throw exception in bitTest when position is out of bound This happens whenever the number of bit positions is bigger than the number of bits in the number, or when the bit position is negative. --- src/Functions/bitTest.cpp | 14 +++++++++-- .../00967_ubsan_bit_test.reference | 1 - .../0_stateless/00967_ubsan_bit_test.sql | 1 - .../01082_bit_test_out_of_bound.reference | 24 +++++++++++++++++++ .../01082_bit_test_out_of_bound.sql | 5 ++++ .../01710_minmax_count_projection.sql | 2 +- 6 files changed, 42 insertions(+), 5 deletions(-) delete mode 100644 tests/queries/0_stateless/00967_ubsan_bit_test.reference delete mode 100644 tests/queries/0_stateless/00967_ubsan_bit_test.sql diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp index 78ec9c8b773..f4c90a0c603 100644 --- a/src/Functions/bitTest.cpp +++ b/src/Functions/bitTest.cpp @@ -8,6 +8,7 @@ namespace DB namespace ErrorCodes { extern const int NOT_IMPLEMENTED; + extern const int PARAMETER_OUT_OF_BOUND; } namespace @@ -21,12 +22,21 @@ struct BitTestImpl static const constexpr bool allow_string_integer = false; template - NO_SANITIZE_UNDEFINED static Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) + static Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v || is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "bitTest is not implemented for big integers as second argument"); else - return (typename NumberTraits::ToInteger::Type(a) >> typename NumberTraits::ToInteger::Type(b)) & 1; + { + const auto max_position = decltype(b)((8 * sizeof(a)) - 1); + if (b > max_position || b < 0) + { + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "The bit position argument needs to a positive value and less or equal to {} for integer {}", + static_cast(max_position), static_cast(a)); + } + return (a >> b) & 1; + } } #if USE_EMBEDDED_COMPILER diff --git a/tests/queries/0_stateless/00967_ubsan_bit_test.reference b/tests/queries/0_stateless/00967_ubsan_bit_test.reference deleted file mode 100644 index 573541ac970..00000000000 --- a/tests/queries/0_stateless/00967_ubsan_bit_test.reference +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/tests/queries/0_stateless/00967_ubsan_bit_test.sql b/tests/queries/0_stateless/00967_ubsan_bit_test.sql deleted file mode 100644 index 1682e725670..00000000000 --- a/tests/queries/0_stateless/00967_ubsan_bit_test.sql +++ /dev/null @@ -1 +0,0 @@ -SELECT sum(ignore(bitTest(number, 65))) FROM numbers(10); diff --git a/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference b/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference index 708c5d9d994..ee35e683ed1 100644 --- a/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference +++ b/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference @@ -198,3 +198,27 @@ 97 1 98 1 99 1 +0 1 +1 0 +2 1 +3 0 +4 1 +5 0 +6 1 +7 0 +0 1 +1 0 +2 1 +3 0 +4 1 +5 0 +6 1 +7 0 +8 1 +9 0 +10 1 +11 0 +12 1 +13 0 +14 1 +15 0 diff --git a/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql b/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql index 82e2c5a2380..324768b2e1d 100644 --- a/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql +++ b/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql @@ -1,2 +1,7 @@ SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(100); SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(100); + +SELECT number, bitTest(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); +SELECT number, bitTest(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } +SELECT number, bitTest(toUInt16(1 + 4 + 16 + 64 + 256 + 1024 + 4096 + 16384 + 65536), number) FROM numbers(16); +SELECT -number, bitTest(toUInt16(1), -number) FROM numbers(8); -- { serverError PARAMETER_OUT_OF_BOUND } diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.sql b/tests/queries/0_stateless/01710_minmax_count_projection.sql index d0177da84d2..6c598bce440 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.sql +++ b/tests/queries/0_stateless/01710_minmax_count_projection.sql @@ -16,7 +16,7 @@ select min(i), max(i), count() from d where _partition_value.1 = 10 group by _pa select min(i) from d where 1 = _partition_value.1; -- fuzz crash https://github.com/ClickHouse/ClickHouse/issues/37151 -SELECT min(i), max(i), count() FROM d WHERE (_partition_value.1) = 0 GROUP BY ignore(bitTest(ignore(NULL), 65535), NULL, (_partition_value.1) = 7, '10.25', bitTest(NULL, -9223372036854775808), NULL, ignore(ignore(-2147483647, NULL)), 1024), _partition_id ORDER BY _partition_id ASC NULLS FIRST; +SELECT min(i), max(i), count() FROM d WHERE (_partition_value.1) = 0 GROUP BY ignore(bitTest(ignore(NULL), 0), NULL, (_partition_value.1) = 7, '10.25', bitTest(NULL, 0), NULL, ignore(ignore(-2147483647, NULL)), 1024), _partition_id ORDER BY _partition_id ASC NULLS FIRST; drop table d; From d63f70e4f1459bbb7808fd1fb7202e9648ad6570 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Thu, 27 Jun 2024 10:41:51 +0000 Subject: [PATCH 185/439] Add bound check to bitTestAll and bitTestAny --- src/Functions/FunctionBitTestMany.h | 13 +- src/Functions/bitTest.cpp | 8 +- .../01082_bit_test_out_of_bound.reference | 184 ------------------ .../01082_bit_test_out_of_bound.sql | 6 +- 4 files changed, 20 insertions(+), 191 deletions(-) diff --git a/src/Functions/FunctionBitTestMany.h b/src/Functions/FunctionBitTestMany.h index 71e94b1e71d..19ece2ae9e5 100644 --- a/src/Functions/FunctionBitTestMany.h +++ b/src/Functions/FunctionBitTestMany.h @@ -17,6 +17,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; + extern const int PARAMETER_OUT_OF_BOUND; } @@ -146,6 +147,9 @@ private: const auto pos = pos_col_const->getUInt(0); if (pos < 8 * sizeof(ValueType)) mask = mask | (ValueType(1) << pos); + else + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "The bit position argument {} is out of bounds for number", static_cast(pos)); } else { @@ -186,13 +190,20 @@ private: for (const auto i : collections::range(0, mask.size())) if (pos[i] < 8 * sizeof(ValueType)) mask[i] = mask[i] | (ValueType(1) << pos[i]); + else + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "The bit position argument {} is out of bounds for number", static_cast(pos[i])); return true; } else if (const auto pos_col_const = checkAndGetColumnConst>(pos_col_untyped)) { const auto & pos = pos_col_const->template getValue(); - const auto new_mask = pos < 8 * sizeof(ValueType) ? ValueType(1) << pos : 0; + if (pos >= 8 * sizeof(ValueType)) + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "The bit position argument {} is out of bounds for number", static_cast(pos)); + + const auto new_mask = ValueType(1) << pos; for (const auto i : collections::range(0, mask.size())) mask[i] = mask[i] | new_mask; diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp index f4c90a0c603..1223ef7cbbb 100644 --- a/src/Functions/bitTest.cpp +++ b/src/Functions/bitTest.cpp @@ -28,14 +28,14 @@ struct BitTestImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "bitTest is not implemented for big integers as second argument"); else { + typename NumberTraits::ToInteger::Type a_int(a); + typename NumberTraits::ToInteger::Type b_int(b); const auto max_position = decltype(b)((8 * sizeof(a)) - 1); if (b > max_position || b < 0) - { throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "The bit position argument needs to a positive value and less or equal to {} for integer {}", - static_cast(max_position), static_cast(a)); - } - return (a >> b) & 1; + std::to_string(max_position), std::to_string(a_int)); + return (a_int >> b_int) & 1; } } diff --git a/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference b/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference index ee35e683ed1..cf12c6b0b1c 100644 --- a/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference +++ b/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference @@ -6,98 +6,6 @@ 5 0 6 1 7 0 -8 0 -9 0 -10 0 -11 0 -12 0 -13 0 -14 0 -15 0 -16 0 -17 0 -18 0 -19 0 -20 0 -21 0 -22 0 -23 0 -24 0 -25 0 -26 0 -27 0 -28 0 -29 0 -30 0 -31 0 -32 0 -33 0 -34 0 -35 0 -36 0 -37 0 -38 0 -39 0 -40 0 -41 0 -42 0 -43 0 -44 0 -45 0 -46 0 -47 0 -48 0 -49 0 -50 0 -51 0 -52 0 -53 0 -54 0 -55 0 -56 0 -57 0 -58 0 -59 0 -60 0 -61 0 -62 0 -63 0 -64 0 -65 0 -66 0 -67 0 -68 0 -69 0 -70 0 -71 0 -72 0 -73 0 -74 0 -75 0 -76 0 -77 0 -78 0 -79 0 -80 0 -81 0 -82 0 -83 0 -84 0 -85 0 -86 0 -87 0 -88 0 -89 0 -90 0 -91 0 -92 0 -93 0 -94 0 -95 0 -96 0 -97 0 -98 0 -99 0 0 1 1 0 2 1 @@ -106,98 +14,6 @@ 5 0 6 1 7 0 -8 1 -9 1 -10 1 -11 1 -12 1 -13 1 -14 1 -15 1 -16 1 -17 1 -18 1 -19 1 -20 1 -21 1 -22 1 -23 1 -24 1 -25 1 -26 1 -27 1 -28 1 -29 1 -30 1 -31 1 -32 1 -33 1 -34 1 -35 1 -36 1 -37 1 -38 1 -39 1 -40 1 -41 1 -42 1 -43 1 -44 1 -45 1 -46 1 -47 1 -48 1 -49 1 -50 1 -51 1 -52 1 -53 1 -54 1 -55 1 -56 1 -57 1 -58 1 -59 1 -60 1 -61 1 -62 1 -63 1 -64 1 -65 1 -66 1 -67 1 -68 1 -69 1 -70 1 -71 1 -72 1 -73 1 -74 1 -75 1 -76 1 -77 1 -78 1 -79 1 -80 1 -81 1 -82 1 -83 1 -84 1 -85 1 -86 1 -87 1 -88 1 -89 1 -90 1 -91 1 -92 1 -93 1 -94 1 -95 1 -96 1 -97 1 -98 1 -99 1 0 1 1 0 2 1 diff --git a/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql b/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql index 324768b2e1d..92ece2a4aa4 100644 --- a/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql +++ b/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql @@ -1,5 +1,7 @@ -SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(100); -SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(100); +SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); +SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } +SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); +SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } SELECT number, bitTest(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); SELECT number, bitTest(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } From ce330d54cf9e75defa98fa8fa6e16b17252441ad Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 28 Jun 2024 13:38:32 +0200 Subject: [PATCH 186/439] Review fixes (partial) --- .../DataLakes/DeltaLakeMetadata.cpp | 41 +++++++++++-------- .../DataLakes/IStorageDataLake.h | 19 ++++++--- .../StorageObjectStorageSource.cpp | 1 - 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index bd3e21f12fd..79cd48e7aab 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -168,14 +168,20 @@ struct DeltaLakeMetadata::Impl * \"nullCount\":{\"col-6c990940-59bb-4709-8f2e-17083a82c01a\":0,\"col-763cd7e2-7627-4d8e-9fb7-9e85d0c8845b\":0}}"}} * " */ + + /// Read metadata file and fill `file_schema`, `file_parition_columns`, `result`. + /// `result` is a list of data files. + /// `file_schema` is a common schema for all files. + /// Schema evolution is not supported, so we check that all files have the same schema. + /// `file_partiion_columns` is information about parition columns of data files. void processMetadataFile( - const String & key, + const String & metadata_file_path, NamesAndTypesList & file_schema, DataLakePartitionColumns & file_partition_columns, std::set & result) { auto read_settings = context->getReadSettings(); - auto buf = object_storage->readObject(StoredObject(key), read_settings); + auto buf = object_storage->readObject(StoredObject(metadata_file_path), read_settings); char c; while (!buf->eof()) @@ -197,9 +203,9 @@ struct DeltaLakeMetadata::Impl Poco::Dynamic::Var json = parser.parse(json_str); Poco::JSON::Object::Ptr object = json.extract(); - std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - object->stringify(oss); - LOG_TEST(log, "Metadata: {}", oss.str()); + // std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + // object->stringify(oss); + // LOG_TEST(log, "Metadata: {}", oss.str()); if (object->has("add")) { @@ -211,21 +217,24 @@ struct DeltaLakeMetadata::Impl auto it = file_partition_columns.find(filename); if (it == file_partition_columns.end()) { - auto partition_values = add_object->get("partitionValues").extract(); - if (partition_values->size()) + if (add_object->has("partitionValues")) { - auto & current_partition_columns = file_partition_columns[filename]; - for (const auto & partition_name : partition_values->getNames()) + auto partition_values = add_object->get("partitionValues").extract(); + if (partition_values->size()) { - const auto value = partition_values->getValue(partition_name); - auto name_and_type = file_schema.tryGetByName(partition_name); - if (!name_and_type) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", partition_name); + auto & current_partition_columns = file_partition_columns[filename]; + for (const auto & partition_name : partition_values->getNames()) + { + const auto value = partition_values->getValue(partition_name); + auto name_and_type = file_schema.tryGetByName(partition_name); + if (!name_and_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", partition_name); - auto field = getFieldValue(value, name_and_type->type); - current_partition_columns.emplace_back(*name_and_type, field); + auto field = getFieldValue(value, name_and_type->type); + current_partition_columns.emplace_back(*name_and_type, field); - LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); + LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); + } } } } diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index 97fb9890490..ab069364021 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -99,14 +99,16 @@ public: Storage::updateConfiguration(local_context); auto new_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); - auto partition_columns = new_metadata->getPartitionColumns(); - - if (partition_columns != Storage::partition_columns) - Storage::partition_columns = partition_columns; - if (current_metadata && *current_metadata == *new_metadata) return; + if (!current_metadata) + { + auto partition_columns = new_metadata->getPartitionColumns(); + if (partition_columns != Storage::partition_columns) + Storage::partition_columns = partition_columns; + } + current_metadata = std::move(new_metadata); auto updated_configuration = base_configuration->clone(); updated_configuration->setPaths(current_metadata->getDataFiles()); @@ -127,6 +129,13 @@ public: { base_configuration->format = Storage::configuration->format; } + + if (current_metadata) + { + auto partition_columns = current_metadata->getPartitionColumns(); + if (partition_columns != Storage::partition_columns) + Storage::partition_columns = partition_columns; + } } private: diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 6a5957c405b..b99ac466081 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -229,7 +229,6 @@ Chunk StorageObjectStorageSource::generate() } } - return chunk; } From fa7bad4993dee91009cc275fb49755aee0bf849d Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 28 Jun 2024 13:51:42 +0200 Subject: [PATCH 187/439] Decrease sampling rate slightly --- src/Common/GWPAsan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/GWPAsan.cpp b/src/Common/GWPAsan.cpp index ea376609ff4..f4a916a696b 100644 --- a/src/Common/GWPAsan.cpp +++ b/src/Common/GWPAsan.cpp @@ -57,7 +57,7 @@ static bool guarded_alloc_initialized = [] opts.MaxSimultaneousAllocations = 1024; if (!env_options_raw || !std::string_view{env_options_raw}.contains("SampleRate")) - opts.SampleRate = 5000; + opts.SampleRate = 6000; const char * collect_stacktraces = std::getenv("GWP_ASAN_COLLECT_STACKTRACES"); // NOLINT(concurrency-mt-unsafe) if (collect_stacktraces && std::string_view{collect_stacktraces} == "1") From f25147aefd7cef1ed18696df57a60369120229f8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 28 Jun 2024 14:57:38 +0200 Subject: [PATCH 188/439] Review fixes --- .../ObjectStorage/DataLakes/IStorageDataLake.h | 14 ++++---------- .../ObjectStorage/StorageObjectStorage.cpp | 11 ++++------- src/Storages/ObjectStorage/StorageObjectStorage.h | 5 ++++- .../ObjectStorage/StorageObjectStorageSource.cpp | 5 ++--- .../ObjectStorage/StorageObjectStorageSource.h | 4 +--- 5 files changed, 15 insertions(+), 24 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index ab069364021..f1217bc9729 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -102,16 +102,10 @@ public: if (current_metadata && *current_metadata == *new_metadata) return; - if (!current_metadata) - { - auto partition_columns = new_metadata->getPartitionColumns(); - if (partition_columns != Storage::partition_columns) - Storage::partition_columns = partition_columns; - } - current_metadata = std::move(new_metadata); auto updated_configuration = base_configuration->clone(); updated_configuration->setPaths(current_metadata->getDataFiles()); + updated_configuration->setPartitionColumns(current_metadata->getPartitionColumns()); Storage::configuration = updated_configuration; } @@ -132,9 +126,9 @@ public: if (current_metadata) { - auto partition_columns = current_metadata->getPartitionColumns(); - if (partition_columns != Storage::partition_columns) - Storage::partition_columns = partition_columns; + const auto & columns = current_metadata->getPartitionColumns(); + base_configuration->setPartitionColumns(columns); + Storage::configuration->setPartitionColumns(columns); } } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 4e3eeeb17f2..683473006e3 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -111,8 +111,7 @@ public: const bool need_only_count_, ContextPtr context_, size_t max_block_size_, - size_t num_streams_, - const DataLakePartitionColumns & partition_columns_) + size_t num_streams_) : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_) , object_storage(object_storage_) , configuration(configuration_) @@ -124,7 +123,6 @@ public: , max_block_size(max_block_size_) , num_streams(num_streams_) , distributed_processing(distributed_processing_) - , partition_columns(partition_columns_) { } @@ -163,7 +161,7 @@ public: { auto source = std::make_shared( getName(), object_storage, configuration, info, format_settings, - context, max_block_size, iterator_wrapper, max_parsing_threads, need_only_count, partition_columns); + context, max_block_size, iterator_wrapper, max_parsing_threads, need_only_count); source->setKeyCondition(filter_actions_dag, context); pipes.emplace_back(std::move(source)); @@ -192,7 +190,6 @@ private: const size_t max_block_size; size_t num_streams; const bool distributed_processing; - DataLakePartitionColumns partition_columns; void createIterator(const ActionsDAG::Node * predicate) { @@ -252,8 +249,7 @@ void StorageObjectStorage::read( need_only_count, local_context, max_block_size, - num_streams, - partition_columns); + num_streams); query_plan.addStep(std::move(read_step)); } @@ -464,6 +460,7 @@ StorageObjectStorage::Configuration::Configuration(const Configuration & other) format = other.format; compression_method = other.compression_method; structure = other.structure; + partition_columns = other.partition_columns; } bool StorageObjectStorage::Configuration::withPartitionWildcard() const diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index fba91edf6f7..c93a0bf6943 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -136,7 +136,6 @@ protected: const std::optional format_settings; const ASTPtr partition_by; const bool distributed_processing; - mutable DataLakePartitionColumns partition_columns; LoggerPtr log; }; @@ -196,6 +195,9 @@ public: virtual ConfigurationPtr clone() = 0; virtual bool isStaticConfiguration() const { return true; } + void setPartitionColumns(const DataLakePartitionColumns & columns) { partition_columns = columns; } + const DataLakePartitionColumns & getPartitionColumns() const { return partition_columns; } + String format = "auto"; String compression_method = "auto"; String structure = "auto"; @@ -207,6 +209,7 @@ protected: void assertInitialized() const; bool initialized = false; + DataLakePartitionColumns partition_columns; }; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index b99ac466081..6bded90f11d 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -49,8 +49,7 @@ StorageObjectStorageSource::StorageObjectStorageSource( UInt64 max_block_size_, std::shared_ptr file_iterator_, size_t max_parsing_threads_, - bool need_only_count_, - const DataLakePartitionColumns & partition_columns_) + bool need_only_count_) : SourceWithKeyCondition(info.source_header, false) , WithContext(context_) , name(std::move(name_)) @@ -69,7 +68,6 @@ StorageObjectStorageSource::StorageObjectStorageSource( , columns_desc(info.columns_description) , file_iterator(file_iterator_) , schema_cache(StorageObjectStorage::getSchemaCache(context_, configuration->getTypeName())) - , partition_columns(partition_columns_) , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(*create_reader_pool, "Reader")) { } @@ -206,6 +204,7 @@ Chunk StorageObjectStorageSource::generate() .last_modified = object_info->metadata->last_modified }); + const auto & partition_columns = configuration->getPartitionColumns(); if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) { auto partition_values = partition_columns.find(filename); diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 566cc687400..c16619b34d8 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -40,8 +40,7 @@ public: UInt64 max_block_size_, std::shared_ptr file_iterator_, size_t max_parsing_threads_, - bool need_only_count_, - const DataLakePartitionColumns & partition_columns_ = {}); + bool need_only_count_); ~StorageObjectStorageSource() override; @@ -83,7 +82,6 @@ protected: bool initialized = false; size_t total_rows_in_file = 0; LoggerPtr log = getLogger("StorageObjectStorageSource"); - DataLakePartitionColumns partition_columns; struct ReaderHolder : private boost::noncopyable { From fbdbbf5a782a5fb032dacf880ade1ebc505e8dea Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 28 Jun 2024 15:17:35 +0200 Subject: [PATCH 189/439] Fix typo --- src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 79cd48e7aab..3b6cbca5d46 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -173,7 +173,7 @@ struct DeltaLakeMetadata::Impl /// `result` is a list of data files. /// `file_schema` is a common schema for all files. /// Schema evolution is not supported, so we check that all files have the same schema. - /// `file_partiion_columns` is information about parition columns of data files. + /// `file_partiion_columns` is information about partition columns of data files. void processMetadataFile( const String & metadata_file_path, NamesAndTypesList & file_schema, From 31c65a40926d3d5209898f5efb5c1cf33b602133 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 28 Jun 2024 14:21:14 +0000 Subject: [PATCH 190/439] add settings to change history --- src/Core/SettingsChangesHistory.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 4ac25a649b7..1c5ad9d0875 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -87,6 +87,9 @@ namespace SettingsChangesHistory static const std::map settings_changes_history = { {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"}, + {"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"}, + {"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"}, }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, From 898dd8bb8efd260733c8b967868e4fcb88fb145e Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Fri, 28 Jun 2024 15:15:19 +0000 Subject: [PATCH 191/439] Throw exception in bitShift for negative shift positions --- src/Functions/bitShiftLeft.cpp | 6 ++++++ src/Functions/bitShiftRight.cpp | 7 +++++++ ...ift_throws_error_for_negative_shift_positions.reference | 0 ...bit_shift_throws_error_for_negative_shift_positions.sql | 7 +++++++ 4 files changed, 20 insertions(+) create mode 100644 tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.reference create mode 100644 tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.sql diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index c366a1ecb44..c3f5de628aa 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -24,6 +24,8 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); + else if (b < 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else if constexpr (is_big_int_v) return static_cast(a) << static_cast(b); else @@ -35,6 +37,8 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); + else if (b < 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else { UInt8 word_size = 8; @@ -100,6 +104,8 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); + else if (b < 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else { UInt8 word_size = 8; diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 1c37cd3bf4c..b53485c45f5 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -8,6 +8,7 @@ namespace ErrorCodes { extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; } namespace @@ -25,6 +26,8 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); + else if (b < 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else if constexpr (is_big_int_v) return static_cast(a) >> static_cast(b); else @@ -51,6 +54,8 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); + else if (b < 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else { UInt8 word_size = 8; @@ -88,6 +93,8 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); + else if (b < 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else { UInt8 word_size = 8; diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.reference b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.sql b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.sql new file mode 100644 index 00000000000..659d03d1951 --- /dev/null +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.sql @@ -0,0 +1,7 @@ +SELECT bitShiftRight(1, -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT bitShiftRight('hola', -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT bitShiftRight(toFixedString('hola', 10), -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT bitShiftLeft(1, -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT bitShiftLeft('hola', -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT bitShiftLeft(toFixedString('hola', 10), -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } \ No newline at end of file From 77c8f034597639439e7b9c09ea89207c73cd398e Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Fri, 28 Jun 2024 17:26:03 +0000 Subject: [PATCH 192/439] Fix coding style --- utils/check-style/check-style | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 380656cd1ca..31972894c3d 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -94,6 +94,7 @@ EXTERN_TYPES_EXCLUDES=( ErrorCodes::values[i] ErrorCodes::getErrorCodeByName ErrorCodes::Value + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT ) for extern_type in ${!EXTERN_TYPES[@]}; do type_of_extern=${EXTERN_TYPES[$extern_type]} From d4b71ea4cbacb614b35ac6cd3fd07a0c299e3415 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 28 Jun 2024 23:09:08 +0000 Subject: [PATCH 193/439] fix settings changes --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index c8ddf23ba08..a4883e3f209 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -87,6 +87,7 @@ namespace SettingsChangesHistory static const std::map settings_changes_history = { {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + {"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"}, }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, @@ -153,7 +154,6 @@ static const std::map Date: Sat, 29 Jun 2024 01:35:59 +0200 Subject: [PATCH 194/439] adjust logging --- src/Interpreters/Squashing.cpp | 2 +- src/Processors/Transforms/DeduplicationTokenTransforms.cpp | 2 +- src/Processors/Transforms/PlanSquashingTransform.cpp | 4 ---- src/Processors/Transforms/PlanSquashingTransform.h | 1 - 4 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 2b808e25fbb..25434d1103e 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,7 +1,7 @@ #include #include #include -#include "base/defines.h" +#include namespace DB diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index bcb8ee94f7a..374a6495f79 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -155,7 +155,7 @@ void CheckTokenTransform::transform(Chunk & chunk) return; } - LOG_DEBUG(log, "{}, token: {}", debug, token_info->debugToken()); + LOG_DEBUG(log, "debug: {}, token: {}", debug, token_info->debugToken()); } #endif diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 6a8cd10027e..ee4dfa6a64e 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -1,6 +1,4 @@ #include -#include "Common/Logger.h" -#include "Common/logger_useful.h" #include namespace DB @@ -20,8 +18,6 @@ PlanSquashingTransform::PlanSquashingTransform( void PlanSquashingTransform::consume(Chunk chunk) { - LOG_DEBUG(getLogger("PlanSquashingTransform"), "consume {}", chunk.getNumRows()); - squashed_chunk = squashing.add(std::move(chunk)); } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index 1f83e62284d..e6db245499e 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -23,7 +23,6 @@ protected: private: Squashing squashing; Chunk squashed_chunk; - Chunk finish_chunk; }; } From 004d913c565cc0646222601d6a98789b77d92938 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Sun, 30 Jun 2024 02:14:28 +0000 Subject: [PATCH 195/439] change option to enum and add test --- src/Core/Settings.h | 2 +- src/Core/SettingsEnums.cpp | 4 ++++ src/Core/SettingsEnums.h | 8 ++++++++ src/Interpreters/InterpreterDeleteQuery.cpp | 6 +++--- .../0_stateless/03161_lightweight_delete_projection.sql | 7 +++++++ 5 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 513cdf9f9a2..574017a6953 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -612,7 +612,7 @@ class IColumn; M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \ M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \ - M(String, lightweight_mutation_projection_mode, "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete.", 0) \ + M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete.", 0) \ M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 05985316566..9dfff3c56ca 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -173,6 +173,10 @@ IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGU {{"default", ParallelReplicasCustomKeyFilterType::DEFAULT}, {"range", ParallelReplicasCustomKeyFilterType::RANGE}}) +IMPLEMENT_SETTING_ENUM(LightweightMutationProjectionMode, ErrorCodes::BAD_ARGUMENTS, + {{"throw", LightweightMutationProjectionMode::THROW}, + {"drop", LightweightMutationProjectionMode::DROP}}) + IMPLEMENT_SETTING_AUTO_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS) IMPLEMENT_SETTING_ENUM(ParquetVersion, ErrorCodes::BAD_ARGUMENTS, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 575cd8700c8..8456c4b688c 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -339,6 +339,14 @@ enum class ParallelReplicasCustomKeyFilterType : uint8_t DECLARE_SETTING_ENUM(ParallelReplicasCustomKeyFilterType) +enum class LightweightMutationProjectionMode : uint8_t +{ + THROW, + DROP, +}; + +DECLARE_SETTING_ENUM(LightweightMutationProjectionMode) + DECLARE_SETTING_ENUM(LocalFSReadMethod) enum class S3QueueMode : uint8_t diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 23bbd18ff51..39d5d9e9cef 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -116,15 +116,15 @@ BlockIO InterpreterDeleteQuery::execute() if (table->hasProjection()) { auto context = Context::createCopy(getContext()); - auto mode = Field(context->getSettingsRef().lightweight_mutation_projection_mode); - if (mode == "throw") + auto mode = context->getSettingsRef().lightweight_mutation_projection_mode; + if (mode == LightweightMutationProjectionMode::THROW) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DELETE query is not supported for table {} as it has projections. " "User should drop all the projections manually before running the query", table->getStorageID().getFullTableName()); } - else if (mode == "drop") + else if (mode == LightweightMutationProjectionMode::DROP) { std::vector all_projections = metadata_snapshot->projections.getAllRegisteredNames(); diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index 786f6a3cc34..70a069df1bc 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -19,6 +19,13 @@ DELETE FROM users WHERE uid = 8888 SETTINGS lightweight_mutation_projection_mode DELETE FROM users WHERE uid = 6666 SETTINGS lightweight_mutation_projection_mode = 'drop'; +-- expecting no projection +SELECT + name, + `table` +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users'); + SELECT * FROM users; DROP TABLE users; From 15d9ad65c65a476e8573ec37aca25050a8a8f7a4 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 1 Jul 2024 09:01:32 +0200 Subject: [PATCH 196/439] Reduce even more --- src/Common/GWPAsan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/GWPAsan.cpp b/src/Common/GWPAsan.cpp index f4a916a696b..0482ddb4e2b 100644 --- a/src/Common/GWPAsan.cpp +++ b/src/Common/GWPAsan.cpp @@ -57,7 +57,7 @@ static bool guarded_alloc_initialized = [] opts.MaxSimultaneousAllocations = 1024; if (!env_options_raw || !std::string_view{env_options_raw}.contains("SampleRate")) - opts.SampleRate = 6000; + opts.SampleRate = 8000; const char * collect_stacktraces = std::getenv("GWP_ASAN_COLLECT_STACKTRACES"); // NOLINT(concurrency-mt-unsafe) if (collect_stacktraces && std::string_view{collect_stacktraces} == "1") From 98293b16249b21b4a69da49524c1dffce3fc5fb2 Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Mon, 1 Jul 2024 07:31:57 +0000 Subject: [PATCH 197/439] Max sessions for user tests improvements --- .../test.py | 6 ++-- .../02832_alter_max_sessions_for_user.sh | 36 ++++++++++++------- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/tests/integration/test_profile_max_sessions_for_user/test.py b/tests/integration/test_profile_max_sessions_for_user/test.py index 133991fed7a..a2fa77e8dc9 100755 --- a/tests/integration/test_profile_max_sessions_for_user/test.py +++ b/tests/integration/test_profile_max_sessions_for_user/test.py @@ -7,7 +7,7 @@ import pytest import sys import threading -from helpers.cluster import ClickHouseCluster, run_and_check +from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_logs_contain_with_retry from helpers.uclient import client, prompt @@ -51,7 +51,7 @@ instance = cluster.add_instance( def get_query(name, id): - return f"SElECT '{name}', {id}, number from system.numbers" + return f"SELECT '{name}', {id}, COUNT(*) from system.numbers" def grpc_get_url(): @@ -90,7 +90,7 @@ def threaded_run_test(sessions): if len(sessions) > MAX_SESSIONS_FOR_USER: # High retry amount to avoid flakiness in ASAN (+Analyzer) tests assert_logs_contain_with_retry( - instance, "overflown session count", retry_count=60 + instance, "overflown session count", retry_count=120 ) instance.query(f"KILL QUERY WHERE user='{TEST_USER}' SYNC") diff --git a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh index a3b0d17f1be..87fbffdb1e6 100755 --- a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh +++ b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh @@ -1,10 +1,12 @@ #!/usr/bin/env bash +# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh SESSION_ID_PREFIX="02832_alter_max_sessions_session_$$" +QUERY_ID_PREFIX="02832_alter_max_sessions_query_$$" PROFILE="02832_alter_max_sessions_profile_$$" USER="02832_alter_max_sessions_user_$$" USER2="02832_alter_max_sessions_user_two_$$" @@ -15,6 +17,26 @@ ${CLICKHOUSE_CLIENT} -q $"DROP PROFILE IF EXISTS ${PROFILE}" ${CLICKHOUSE_CLIENT} -q $"CREATE SETTINGS PROFILE ${PROFILE}" ${CLICKHOUSE_CLIENT} -q $"CREATE USER '${USER}' SETTINGS PROFILE '${PROFILE}'" +function run_sessions_set() +{ + local sessions_count="$1" + local session_check="$2" + for ((i = 1 ; i <= ${sessions_count} ; i++)); do + local session_id="${SESSION_ID_PREFIX}_${i}" + local query_id="${QUERY_ID_PREFIX}_${i}" + # Write only expected error text + # More than alter_sessions_count queries will not start. + ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&query_id=${query_id}&session_id=${session_id}&session_check=${session_check}&session_timeout=600&function_sleep_max_microseconds_per_block=120000000" --data-binary "SELECT sleep(120)" | grep -o -m 1 'USER_SESSION_LIMIT_EXCEEDED' & + done + + for ((i = 1 ; i <= ${sessions_count} ; i++)); do + local query_id="${QUERY_ID_PREFIX}_${i}" + $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE query_id='$query_id' SYNC" >/dev/null + done + + wait +} + function test_alter_profile() { local max_session_count="$1" @@ -24,23 +46,13 @@ function test_alter_profile() ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${max_session_count}" # Create sessions with $max_session_count restriction - for ((i = 1 ; i <= ${max_session_count} ; i++)); do - local session_id="${SESSION_ID_PREFIX}_${i}" - # Skip output from this query - ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=0" --data-binary "SELECT 1" > /dev/null - done + run_sessions_set $max_session_count 0 # Update restriction to $alter_sessions_count ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${alter_sessions_count}" # Simultaneous sessions should use max settings from profile ($alter_sessions_count) - for ((i = 1 ; i <= ${max_session_count} ; i++)); do - local session_id="${SESSION_ID_PREFIX}_${i}" - # ignore select 1, we need only errors - ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=1" --data-binary "select sleep(0.3)" | grep -o -m 1 'USER_SESSION_LIMIT_EXCEEDED' & - done - - wait + run_sessions_set $max_session_count 1 } test_alter_profile 1 1 From 6b47171f2c2a3f3ebaed692f6d30e644c42380db Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 1 Jul 2024 10:52:08 +0200 Subject: [PATCH 198/439] Keeper binary with different entrypoint --- docker/packager/packager | 5 +- programs/CMakeLists.txt | 20 +- programs/keeper/CMakeLists.txt | 196 +------------- programs/keeper/Keeper.cpp | 10 - programs/keeper/keeper_main.cpp | 443 ++++++++++++++++++++++++++++++++ 5 files changed, 460 insertions(+), 214 deletions(-) create mode 100644 programs/keeper/keeper_main.cpp diff --git a/docker/packager/packager b/docker/packager/packager index 2dcbd8d695e..da4af7fc1be 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -276,10 +276,7 @@ def parse_env_variables( if is_release_build(debug_build, package_type, sanitizer, coverage): cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON") result.append("WITH_PERFORMANCE=1") - if is_cross_arm: - cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1") - else: - result.append("BUILD_MUSL_KEEPER=1") + cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1") elif package_type == "fuzzers": cmake_flags.append("-DENABLE_FUZZING=1") cmake_flags.append("-DENABLE_PROTOBUF=1") diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 4640882f2be..b06290ae352 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -66,18 +66,18 @@ else() message(STATUS "Library bridge mode: OFF") endif() -if (ENABLE_CLICKHOUSE_KEEPER) - message(STATUS "ClickHouse keeper mode: ON") -else() - message(STATUS "ClickHouse keeper mode: OFF") -endif() - if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) message(STATUS "ClickHouse keeper-converter mode: ON") else() message(STATUS "ClickHouse keeper-converter mode: OFF") endif() +if (ENABLE_CLICKHOUSE_KEEPER) + message(STATUS "ClickHouse keeper mode: ON") +else() + message(STATUS "ClickHouse keeper mode: OFF") +endif() + if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) message(STATUS "ClickHouse keeper-client mode: ON") else() @@ -131,10 +131,6 @@ add_subdirectory (static-files-disk-uploader) add_subdirectory (su) add_subdirectory (disks) -if (ENABLE_CLICKHOUSE_KEEPER) - add_subdirectory (keeper) -endif() - if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) add_subdirectory (keeper-converter) endif() @@ -143,6 +139,10 @@ if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) add_subdirectory (keeper-client) endif() +if (ENABLE_CLICKHOUSE_KEEPER) + add_subdirectory (keeper) +endif() + if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) add_subdirectory (odbc-bridge) endif () diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 079951be55e..9b931c49c24 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -1,4 +1,5 @@ set(CLICKHOUSE_KEEPER_SOURCES + keeper_main.cpp Keeper.cpp ) @@ -8,6 +9,9 @@ set (CLICKHOUSE_KEEPER_LINK clickhouse_common_io clickhouse_common_zookeeper daemon + clickhouse-keeper-converter-lib + clickhouse-keeper-client-lib + clickhouse_functions dbms ) @@ -17,199 +21,11 @@ install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-ke if (BUILD_STANDALONE_KEEPER) # Straight list of all required sources - set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperReconfiguration.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/RaftServerConfig.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/CoordinationSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/FourLetterCommand.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/InMemoryLogStore.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperConnectionStats.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperDispatcher.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperLogStore.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperServer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperContext.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperFeatureFlags.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManager.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManagerS3.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateMachine.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperContext.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateManager.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperConstants.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperAsynchronousMetrics.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperCommon.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SessionExpiryQueue.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SummingStateMachine.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/WriteBufferFromNuraftBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ZooKeeperDataReader.cpp + clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_SOURCES}) - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsFields.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BaseSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/ServerSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/Field.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsEnums.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/ServerUUID.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/UUID.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BackgroundSchedulePool.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/IO/ReadBuffer.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPPathHints.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperTCPHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/TCPServer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/NotFoundHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ProtocolServerAdapter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/CertificateReloader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperReadinessHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/CloudPlacementInfo.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnection.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerRequest.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerResponse.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnectionFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CachedCompressedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CheckingCompressedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferBase.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecZSTD.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/ICompressionCodec.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/LZ4_decompress_faster.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/CurrentThread.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollections.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollectionConfiguration.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/Jemalloc.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/IKeeper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/TestKeeper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperCommon.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperConstants.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperImpl.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperIO.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperLock.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperNodeCache.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/registerDisks.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IDisk.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskSelector.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskLocal.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskLocalCheckThread.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/LocalDirectorySyncGuard.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/TemporaryFileOnDisk.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/loadLocalDiskConfig.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskType.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/IObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataOperationsHolder.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIterator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/StoredObject.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/DiskS3Utils.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/IOUringReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getIOUringReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferFromTemporaryFile.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferWithFinalizeCallback.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/AsynchronousBoundedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getThreadPoolReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolRemoteFSReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolReader.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp - ${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/Context.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/Settings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/ThreadStatusExt.cpp - - Keeper.cpp - clickhouse-keeper.cpp - ) - - # List of resources for clickhouse-keeper client - if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) - list(APPEND CLICKHOUSE_KEEPER_STANDALONE_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/KeeperClient.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Commands.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Parser.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/LineReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/ReplxxLineReader.cpp - ) - endif() - - clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_STANDALONE_SOURCES}) - - # Remove some redundant dependencies - target_compile_definitions (clickhouse-keeper PRIVATE -DCLICKHOUSE_KEEPER_STANDALONE_BUILD) - target_compile_definitions (clickhouse-keeper PUBLIC -DWITHOUT_TEXT_LOG) - - if (ENABLE_CLICKHOUSE_KEEPER_CLIENT AND TARGET ch_rust::skim) - target_link_libraries(clickhouse-keeper PRIVATE ch_rust::skim) - endif() - - target_link_libraries(clickhouse-keeper - PRIVATE - ch_contrib::abseil_swiss_tables - ch_contrib::nuraft - ch_contrib::lz4 - ch_contrib::zstd - ch_contrib::cityhash - ch_contrib::jemalloc - common ch_contrib::double_conversion - ch_contrib::dragonbox_to_chars - pcg_random - ch_contrib::pdqsort - ch_contrib::miniselect - clickhouse_common_config_no_zookeeper_log - loggers_no_text_log - clickhouse_common_io - clickhouse_parsers # Otherwise compression will not built. FIXME. - ) + target_link_libraries(clickhouse-keeper PUBLIC ${CLICKHOUSE_KEEPER_LINK}) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) - if (SPLIT_DEBUG_SYMBOLS) clickhouse_split_debug_symbols(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-keeper) else() diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index f14ef2e5552..60834dbe582 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -75,16 +75,6 @@ int mainEntryClickHouseKeeper(int argc, char ** argv) } } -#ifdef CLICKHOUSE_KEEPER_STANDALONE_BUILD - -// Weak symbols don't work correctly on Darwin -// so we have a stub implementation to avoid linker errors -void collectCrashLog( - Int32, UInt64, const String &, const StackTrace &) -{} - -#endif - namespace DB { diff --git a/programs/keeper/keeper_main.cpp b/programs/keeper/keeper_main.cpp new file mode 100644 index 00000000000..a5bc5db7be8 --- /dev/null +++ b/programs/keeper/keeper_main.cpp @@ -0,0 +1,443 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include /// pair + +#include + +#include "config.h" +#include "config_tools.h" + +#include +#include +#include + +#include +#include + + +int mainEntryClickHouseKeeper(int argc, char ** argv); +#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER +int mainEntryClickHouseKeeperConverter(int argc, char ** argv); +#endif +#if ENABLE_CLICKHOUSE_KEEPER_CLIENT +int mainEntryClickHouseKeeperClient(int argc, char ** argv); +#endif + +namespace +{ + +using MainFunc = int (*)(int, char**); + +/// Add an item here to register new application +std::pair clickhouse_applications[] = +{ + // keeper + {"keeper", mainEntryClickHouseKeeper}, +#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER + {"converter", mainEntryClickHouseKeeperConverter}, + {"keeper-converter", mainEntryClickHouseKeeperConverter}, +#endif +#if ENABLE_CLICKHOUSE_KEEPER_CLIENT + {"client", mainEntryClickHouseKeeperClient}, + {"keeper-client", mainEntryClickHouseKeeperClient}, +#endif + +}; + +int printHelp(int, char **) +{ + std::cerr << "Use one of the following commands:" << std::endl; + for (auto & application : clickhouse_applications) + std::cerr << "clickhouse " << application.first << " [args] " << std::endl; + return -1; +} + + +enum class InstructionFail : uint8_t +{ + NONE = 0, + SSE3 = 1, + SSSE3 = 2, + SSE4_1 = 3, + SSE4_2 = 4, + POPCNT = 5, + AVX = 6, + AVX2 = 7, + AVX512 = 8 +}; + +auto instructionFailToString(InstructionFail fail) +{ + switch (fail) + { +#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) + case InstructionFail::NONE: + ret("NONE"); + case InstructionFail::SSE3: + ret("SSE3"); + case InstructionFail::SSSE3: + ret("SSSE3"); + case InstructionFail::SSE4_1: + ret("SSE4.1"); + case InstructionFail::SSE4_2: + ret("SSE4.2"); + case InstructionFail::POPCNT: + ret("POPCNT"); + case InstructionFail::AVX: + ret("AVX"); + case InstructionFail::AVX2: + ret("AVX2"); + case InstructionFail::AVX512: + ret("AVX512"); +#undef ret + } +} + + +sigjmp_buf jmpbuf; + +[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *) +{ + siglongjmp(jmpbuf, 1); +} + +/// Check if necessary SSE extensions are available by trying to execute some sse instructions. +/// If instruction is unavailable, SIGILL will be sent by kernel. +void checkRequiredInstructionsImpl(volatile InstructionFail & fail) +{ +#if defined(__SSE3__) + fail = InstructionFail::SSE3; + __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if defined(__SSSE3__) + fail = InstructionFail::SSSE3; + __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); + +#endif + +#if defined(__SSE4_1__) + fail = InstructionFail::SSE4_1; + __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if defined(__SSE4_2__) + fail = InstructionFail::SSE4_2; + __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); +#endif + + /// Defined by -msse4.2 +#if defined(__POPCNT__) + fail = InstructionFail::POPCNT; + { + uint64_t a = 0; + uint64_t b = 0; + __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); + } +#endif + +#if defined(__AVX__) + fail = InstructionFail::AVX; + __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if defined(__AVX2__) + fail = InstructionFail::AVX2; + __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if defined(__AVX512__) + fail = InstructionFail::AVX512; + __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); +#endif + + fail = InstructionFail::NONE; +} + +/// Macros to avoid using strlen(), since it may fail if SSE is not supported. +#define writeError(data) do \ + { \ + static_assert(__builtin_constant_p(data)); \ + if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ + _Exit(1); \ + } while (false) + +/// Check SSE and others instructions availability. Calls exit on fail. +/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. +void checkRequiredInstructions() +{ + struct sigaction sa{}; + struct sigaction sa_old{}; + sa.sa_sigaction = sigIllCheckHandler; + sa.sa_flags = SA_SIGINFO; + auto signal = SIGILL; + if (sigemptyset(&sa.sa_mask) != 0 + || sigaddset(&sa.sa_mask, signal) != 0 + || sigaction(signal, &sa, &sa_old) != 0) + { + /// You may wonder about strlen. + /// Typical implementation of strlen is using SSE4.2 or AVX2. + /// But this is not the case because it's compiler builtin and is executed at compile time. + + writeError("Can not set signal handler\n"); + _Exit(1); + } + + volatile InstructionFail fail = InstructionFail::NONE; + + if (sigsetjmp(jmpbuf, 1)) + { + writeError("Instruction check fail. The CPU does not support "); + if (!std::apply(writeRetry, instructionFailToString(fail))) + _Exit(1); + writeError(" instruction set.\n"); + _Exit(1); + } + + checkRequiredInstructionsImpl(fail); + + if (sigaction(signal, &sa_old, nullptr)) + { + writeError("Can not set signal handler\n"); + _Exit(1); + } +} + +struct Checker +{ + Checker() + { + checkRequiredInstructions(); + } +} checker +#ifndef OS_DARWIN + __attribute__((init_priority(101))) /// Run before other static initializers. +#endif +; + + +#if !defined(USE_MUSL) +/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. +void checkHarmfulEnvironmentVariables(char ** argv) +{ + std::initializer_list harmful_env_variables = { + /// The list is a selection from "man ld-linux". + "LD_PRELOAD", + "LD_LIBRARY_PATH", + "LD_ORIGIN_PATH", + "LD_AUDIT", + "LD_DYNAMIC_WEAK", + /// The list is a selection from "man dyld" (osx). + "DYLD_LIBRARY_PATH", + "DYLD_FALLBACK_LIBRARY_PATH", + "DYLD_VERSIONED_LIBRARY_PATH", + "DYLD_INSERT_LIBRARIES", + }; + + bool require_reexec = false; + for (const auto * var : harmful_env_variables) + { + if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe) + { + /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful + if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently + { + fmt::print(stderr, "Cannot override {} environment variable", var); + _exit(1); + } + require_reexec = true; + } + } + + if (require_reexec) + { + /// Use execvp() over execv() to search in PATH. + /// + /// This should be safe, since: + /// - if argv[0] is relative path - it is OK + /// - if argv[0] has only basename, the it will search in PATH, like shell will do. + /// + /// Also note, that this (search in PATH) because there is no easy and + /// portable way to get absolute path of argv[0]. + /// - on linux there is /proc/self/exec and AT_EXECFN + /// - but on other OSes there is no such thing (especially on OSX). + /// + /// And since static linking will be done someday anyway, + /// let's not pollute the code base with special cases. + int error = execvp(argv[0], argv); + _exit(error); + } +} +#endif + + +#if defined(SANITIZE_COVERAGE) +__attribute__((no_sanitize("coverage"))) +void dumpCoverage() +{ + /// A user can request to dump the coverage information into files at exit. + /// This is useful for non-server applications such as clickhouse-format or clickhouse-client, + /// that cannot introspect it with SQL functions at runtime. + + /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid' + /// containing the list of addresses of covered . + + /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header. + + if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe) + { + auto dump = [](const std::string & name, auto span) + { + /// Write only non-zeros. + std::vector data; + data.reserve(span.size()); + for (auto addr : span) + if (addr) + data.push_back(addr); + + int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400); + if (-1 == fd) + { + writeError("Cannot open a file to write the coverage data\n"); + } + else + { + if (!writeRetry(fd, reinterpret_cast(data.data()), data.size() * sizeof(data[0]))) + writeError("Cannot write the coverage data to a file\n"); + if (0 != ::close(fd)) + writeError("Cannot close the file with coverage data\n"); + } + }; + + dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage()); + } +} +#endif + +} + +bool isClickhouseApp(std::string_view app_suffix, std::vector & argv) +{ + /// Use app if the first arg 'app' is passed (the arg should be quietly removed) + if (argv.size() >= 2) + { + auto first_arg = argv.begin() + 1; + + /// 'clickhouse --client ...' and 'clickhouse client ...' are Ok + if (*first_arg == app_suffix + || (std::string_view(*first_arg).starts_with("--") && std::string_view(*first_arg).substr(2) == app_suffix)) + { + argv.erase(first_arg); + return true; + } + } + + return false; +} + +/// Don't allow dlopen in the main ClickHouse binary, because it is harmful and insecure. +/// We don't use it. But it can be used by some libraries for implementation of "plugins". +/// We absolutely discourage the ancient technique of loading +/// 3rd-party uncontrolled dangerous libraries into the process address space, +/// because it is insane. + +#if !defined(USE_MUSL) +extern "C" +{ + void * dlopen(const char *, int) + { + return nullptr; + } + + void * dlmopen(long, const char *, int) // NOLINT + { + return nullptr; + } + + int dlclose(void *) + { + return 0; + } + + const char * dlerror() + { + return "ClickHouse does not allow dynamic library loading"; + } +} +#endif + +/// Prevent messages from JeMalloc in the release build. +/// Some of these messages are non-actionable for the users, such as: +/// : Number of CPUs detected is not deterministic. Per-CPU arena disabled. +#if USE_JEMALLOC && defined(NDEBUG) && !defined(SANITIZER) +extern "C" void (*malloc_message)(void *, const char *s); +__attribute__((constructor(0))) void init_je_malloc_message() { malloc_message = [](void *, const char *){}; } +#endif + +/// This allows to implement assert to forbid initialization of a class in static constructors. +/// Usage: +/// +/// extern bool inside_main; +/// class C { C() { assert(inside_main); } }; +bool inside_main = false; + +int main(int argc_, char ** argv_) +{ + inside_main = true; + SCOPE_EXIT({ inside_main = false; }); + + /// PHDR cache is required for query profiler to work reliably + /// It also speed up exception handling, but exceptions from dynamically loaded libraries (dlopen) + /// will work only after additional call of this function. + /// Note: we forbid dlopen in our code. + updatePHDRCache(); + +#if !defined(USE_MUSL) + checkHarmfulEnvironmentVariables(argv_); +#endif + + /// This is used for testing. For example, + /// clickhouse-local should be able to run a simple query without throw/catch. + if (getenv("CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION")) // NOLINT(concurrency-mt-unsafe) + DB::terminate_on_any_exception = true; + + /// Reset new handler to default (that throws std::bad_alloc) + /// It is needed because LLVM library clobbers it. + std::set_new_handler(nullptr); + + std::vector argv(argv_, argv_ + argc_); + + /// Print a basic help if nothing was matched + MainFunc main_func = mainEntryClickHouseKeeper; + + if (isClickhouseApp("help", argv)) + { + main_func = printHelp; + } + else + { + for (auto & application : clickhouse_applications) + { + if (isClickhouseApp(application.first, argv)) + { + main_func = application.second; + break; + } + } + } + + int exit_code = main_func(static_cast(argv.size()), argv.data()); + +#if defined(SANITIZE_COVERAGE) + dumpCoverage(); +#endif + + return exit_code; +} From 4a9daa202d74ba30fc3efd455f6a37a41bb4e4db Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 1 Jul 2024 11:24:45 +0200 Subject: [PATCH 199/439] Remove Keeper standalone build --- docker/packager/binary-builder/build.sh | 1 + programs/self-extracting/CMakeLists.txt | 17 +- src/Compression/CompressionFactory.cpp | 4 - src/Coordination/Standalone/Context.cpp | 486 ------------------ src/Coordination/Standalone/Context.h | 178 ------- src/Coordination/Standalone/Settings.cpp | 24 - .../Standalone/ThreadStatusExt.cpp | 19 - src/Core/SettingsFields.cpp | 45 -- src/Core/SettingsFields.h | 14 +- src/Daemon/BaseDaemon.cpp | 5 - src/Daemon/SentryWriter.cpp | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 2 - src/Disks/ObjectStorages/DiskObjectStorage.h | 2 - .../DiskObjectStorageMetadata.cpp | 4 - .../ObjectStorages/MetadataStorageFactory.cpp | 6 - .../ObjectStorages/ObjectStorageFactory.cpp | 18 +- .../createMetadataStorageMetrics.h | 12 +- src/Disks/registerDisks.cpp | 15 - src/IO/S3/BlobStorageLogWriter.cpp | 2 - src/Interpreters/Context.h | 8 - src/Server/PrometheusRequestHandler.cpp | 3 - src/Server/ProtocolServerAdapter.cpp | 4 +- src/Server/ProtocolServerAdapter.h | 2 +- 23 files changed, 36 insertions(+), 837 deletions(-) delete mode 100644 src/Coordination/Standalone/Context.cpp delete mode 100644 src/Coordination/Standalone/Context.h delete mode 100644 src/Coordination/Standalone/Settings.cpp delete mode 100644 src/Coordination/Standalone/ThreadStatusExt.cpp diff --git a/docker/packager/binary-builder/build.sh b/docker/packager/binary-builder/build.sh index 032aceb0af3..bd5f2fe8466 100755 --- a/docker/packager/binary-builder/build.sh +++ b/docker/packager/binary-builder/build.sh @@ -111,6 +111,7 @@ fi mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output [ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output [ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output +[ -x ./programs/self-extracting/clickhouse-keeper ] && mv ./programs/self-extracting/clickhouse-keeper /output mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds mv ./programs/*.dict ./programs/*.options ./programs/*_seed_corpus.zip /output ||: # libFuzzer oss-fuzz compatible infrastructure diff --git a/programs/self-extracting/CMakeLists.txt b/programs/self-extracting/CMakeLists.txt index 4b6dd07f618..32b686d40dd 100644 --- a/programs/self-extracting/CMakeLists.txt +++ b/programs/self-extracting/CMakeLists.txt @@ -10,9 +10,24 @@ else () set (COMPRESSOR "${PROJECT_BINARY_DIR}/utils/self-extracting-executable/compressor") endif () -add_custom_target (self-extracting ALL +add_custom_target (self-extracting-server ALL ${CMAKE_COMMAND} -E remove clickhouse clickhouse-stripped COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse ../clickhouse COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse-stripped ../clickhouse-stripped DEPENDS clickhouse clickhouse-stripped compressor ) + +set(self_extracting_deps "self-extracting-server") + +if (BUILD_STANDALONE_KEEPER) + add_custom_target (self-extracting-keeper ALL + ${CMAKE_COMMAND} -E remove clickhouse-keeper + COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse-keeper ../clickhouse-keeper + DEPENDS compressor clickhouse-keeper + ) + list(APPEND self_extracting_deps "self-extracting-keeper") +endif() + +add_custom_target (self-extracting ALL + DEPENDS ${self_extracting_deps} +) diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 68e0131c91b..2e7aa0d086f 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -185,7 +185,6 @@ void registerCodecDeflateQpl(CompressionCodecFactory & factory); /// Keeper use only general-purpose codecs, so we don't need these special codecs /// in standalone build -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerCodecDelta(CompressionCodecFactory & factory); void registerCodecT64(CompressionCodecFactory & factory); void registerCodecDoubleDelta(CompressionCodecFactory & factory); @@ -193,7 +192,6 @@ void registerCodecGorilla(CompressionCodecFactory & factory); void registerCodecEncrypted(CompressionCodecFactory & factory); void registerCodecFPC(CompressionCodecFactory & factory); void registerCodecGCD(CompressionCodecFactory & factory); -#endif CompressionCodecFactory::CompressionCodecFactory() { @@ -205,7 +203,6 @@ CompressionCodecFactory::CompressionCodecFactory() #endif registerCodecLZ4HC(*this); registerCodecMultiple(*this); -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD registerCodecDelta(*this); registerCodecT64(*this); registerCodecDoubleDelta(*this); @@ -216,7 +213,6 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecDeflateQpl(*this); #endif registerCodecGCD(*this); -#endif default_codec = get("LZ4", {}); } diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp deleted file mode 100644 index 2017adcc58d..00000000000 --- a/src/Coordination/Standalone/Context.cpp +++ /dev/null @@ -1,486 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include -#include - -namespace ProfileEvents -{ - extern const Event ContextLock; - extern const Event ContextLockWaitMicroseconds; -} - -namespace CurrentMetrics -{ - extern const Metric ContextLockWait; - extern const Metric BackgroundSchedulePoolTask; - extern const Metric BackgroundSchedulePoolSize; - extern const Metric IOWriterThreads; - extern const Metric IOWriterThreadsActive; - extern const Metric IOWriterThreadsScheduled; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int UNSUPPORTED_METHOD; -} - -struct ContextSharedPart : boost::noncopyable -{ - ContextSharedPart() - : macros(std::make_unique()) - {} - - ~ContextSharedPart() - { - if (keeper_dispatcher) - { - try - { - keeper_dispatcher->shutdown(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - /// Wait for thread pool for background reads and writes, - /// since it may use per-user MemoryTracker which will be destroyed here. - if (asynchronous_remote_fs_reader) - { - try - { - asynchronous_remote_fs_reader->wait(); - asynchronous_remote_fs_reader.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (asynchronous_local_fs_reader) - { - try - { - asynchronous_local_fs_reader->wait(); - asynchronous_local_fs_reader.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (synchronous_local_fs_reader) - { - try - { - synchronous_local_fs_reader->wait(); - synchronous_local_fs_reader.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (threadpool_writer) - { - try - { - threadpool_writer->wait(); - threadpool_writer.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - } - - /// For access of most of shared objects. - mutable SharedMutex mutex; - - ServerSettings server_settings; - - String path; /// Path to the data directory, with a slash at the end. - ConfigurationPtr config; /// Global configuration settings. - MultiVersion macros; /// Substitutions extracted from config. - OnceFlag schedule_pool_initialized; - mutable std::unique_ptr schedule_pool; /// A thread pool that can run different jobs in background - RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml - - mutable OnceFlag readers_initialized; - mutable std::unique_ptr asynchronous_remote_fs_reader; - mutable std::unique_ptr asynchronous_local_fs_reader; - mutable std::unique_ptr synchronous_local_fs_reader; - -#if USE_LIBURING - mutable OnceFlag io_uring_reader_initialized; - mutable std::unique_ptr io_uring_reader; -#endif - - mutable OnceFlag threadpool_writer_initialized; - mutable std::unique_ptr threadpool_writer; - - mutable ThrottlerPtr remote_read_throttler; /// A server-wide throttler for remote IO reads - mutable ThrottlerPtr remote_write_throttler; /// A server-wide throttler for remote IO writes - - mutable ThrottlerPtr local_read_throttler; /// A server-wide throttler for local IO reads - mutable ThrottlerPtr local_write_throttler; /// A server-wide throttler for local IO writes - - std::optional storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage - - mutable std::mutex keeper_dispatcher_mutex; - mutable std::shared_ptr keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex); -}; - -ContextData::ContextData() = default; -ContextData::ContextData(const ContextData &) = default; - -Context::Context() = default; -Context::Context(const Context & rhs) : ContextData(rhs), std::enable_shared_from_this(rhs) {} -Context::~Context() = default; - -SharedContextHolder::SharedContextHolder(SharedContextHolder &&) noexcept = default; -SharedContextHolder & SharedContextHolder::operator=(SharedContextHolder &&) noexcept = default; -SharedContextHolder::SharedContextHolder() = default; -SharedContextHolder::~SharedContextHolder() = default; -SharedContextHolder::SharedContextHolder(std::unique_ptr shared_context) - : shared(std::move(shared_context)) {} - -void SharedContextHolder::reset() { shared.reset(); } - -void Context::makeGlobalContext() -{ - initGlobal(); - global_context = shared_from_this(); -} - -ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part) -{ - auto res = std::shared_ptr(new Context); - res->shared = shared_part; - return res; -} - -void Context::initGlobal() -{ - assert(!global_context_instance); - global_context_instance = shared_from_this(); -} - -SharedContextHolder Context::createShared() -{ - return SharedContextHolder(std::make_unique()); -} - - -ContextMutablePtr Context::getGlobalContext() const -{ - auto ptr = global_context.lock(); - if (!ptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no global context or global context has expired"); - return ptr; -} - -std::unique_lock Context::getGlobalLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::unique_lock(shared->mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::shared_lock Context::getGlobalSharedLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::shared_lock(shared->mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::unique_lock Context::getLocalLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::unique_lock(mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::shared_lock Context::getLocalSharedLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::shared_lock(mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -String Context::getPath() const -{ - auto lock = getGlobalSharedLock(); - return shared->path; -} - -void Context::setPath(const String & path) -{ - auto lock = getGlobalLock(); - shared->path = path; -} - -MultiVersion::Version Context::getMacros() const -{ - return shared->macros.get(); -} - -void Context::setMacros(std::unique_ptr && macros) -{ - shared->macros.set(std::move(macros)); -} - -BackgroundSchedulePool & Context::getSchedulePool() const -{ - callOnce(shared->schedule_pool_initialized, [&] { - shared->schedule_pool = std::make_unique( - shared->server_settings.background_schedule_pool_size, - CurrentMetrics::BackgroundSchedulePoolTask, - CurrentMetrics::BackgroundSchedulePoolSize, - "BgSchPool"); - }); - - return *shared->schedule_pool; -} - -void Context::setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config) -{ - shared->remote_host_filter.setValuesFromConfig(config); -} - -const RemoteHostFilter & Context::getRemoteHostFilter() const -{ - return shared->remote_host_filter; -} - -IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const -{ - callOnce(shared->readers_initialized, [&] { - const auto & config = getConfigRef(); - shared->asynchronous_remote_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER, config); - shared->asynchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER, config); - shared->synchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER, config); - }); - - switch (type) - { - case FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER: - return *shared->asynchronous_remote_fs_reader; - case FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER: - return *shared->asynchronous_local_fs_reader; - case FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER: - return *shared->synchronous_local_fs_reader; - } -} - -#if USE_LIBURING -IOUringReader & Context::getIOUringReader() const -{ - callOnce(shared->io_uring_reader_initialized, [&] { - shared->io_uring_reader = createIOUringReader(); - }); - - return *shared->io_uring_reader; -} -#endif - -std::shared_ptr Context::getFilesystemCacheLog() const -{ - return nullptr; -} - -std::shared_ptr Context::getFilesystemReadPrefetchesLog() const -{ - return nullptr; -} - -std::shared_ptr Context::getBlobStorageLog() const -{ - return nullptr; -} - -void Context::setConfig(const ConfigurationPtr & config) -{ - auto lock = getGlobalLock(); - shared->config = config; -} - -const Poco::Util::AbstractConfiguration & Context::getConfigRef() const -{ - auto lock = getGlobalSharedLock(); - return shared->config ? *shared->config : Poco::Util::Application::instance().config(); -} - -std::shared_ptr Context::getAsyncReadCounters() const -{ - auto lock = getLocalLock(); - if (!async_read_counters) - async_read_counters = std::make_shared(); - return async_read_counters; -} - -ThreadPool & Context::getThreadPoolWriter() const -{ - callOnce(shared->threadpool_writer_initialized, [&] { - const auto & config = getConfigRef(); - auto pool_size = config.getUInt(".threadpool_writer_pool_size", 100); - auto queue_size = config.getUInt(".threadpool_writer_queue_size", 1000000); - - shared->threadpool_writer = std::make_unique( - CurrentMetrics::IOWriterThreads, CurrentMetrics::IOWriterThreadsActive, CurrentMetrics::IOWriterThreadsScheduled, pool_size, pool_size, queue_size); - }); - - return *shared->threadpool_writer; -} - -ThrottlerPtr Context::getRemoteReadThrottler() const -{ - return nullptr; -} - -ThrottlerPtr Context::getRemoteWriteThrottler() const -{ - return nullptr; -} - -ThrottlerPtr Context::getLocalReadThrottler() const -{ - return nullptr; -} - -ThrottlerPtr Context::getLocalWriteThrottler() const -{ - return nullptr; -} - -ReadSettings Context::getReadSettings() const -{ - return ReadSettings{}; -} - -ResourceManagerPtr Context::getResourceManager() const -{ - return nullptr; -} - -ClassifierPtr Context::getWorkloadClassifier() const -{ - return nullptr; -} - -void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) const -{ - const auto & config_ref = getConfigRef(); - - std::lock_guard lock(shared->keeper_dispatcher_mutex); - - if (shared->keeper_dispatcher) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize Keeper multiple times"); - - if (config_ref.has("keeper_server")) - { - shared->keeper_dispatcher = std::make_shared(); - shared->keeper_dispatcher->initialize(config_ref, true, start_async, getMacros()); - } -} - -std::shared_ptr Context::getKeeperDispatcher() const -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - if (!shared->keeper_dispatcher) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Keeper must be initialized before requests"); - - return shared->keeper_dispatcher; -} - -std::shared_ptr Context::tryGetKeeperDispatcher() const -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - return shared->keeper_dispatcher; -} - -void Context::shutdownKeeperDispatcher() const -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - if (shared->keeper_dispatcher) - { - shared->keeper_dispatcher->shutdown(); - shared->keeper_dispatcher.reset(); - } -} - -void Context::updateKeeperConfiguration([[maybe_unused]] const Poco::Util::AbstractConfiguration & config_) -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - if (!shared->keeper_dispatcher) - return; - - shared->keeper_dispatcher->updateConfiguration(config_, getMacros()); -} - -std::shared_ptr Context::getZooKeeper() const -{ - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot connect to ZooKeeper from Keeper"); -} - -const S3SettingsByEndpoint & Context::getStorageS3Settings() const -{ - std::lock_guard lock(shared->mutex); - - if (!shared->storage_s3_settings) - { - const auto & config = shared->config ? *shared->config : Poco::Util::Application::instance().config(); - shared->storage_s3_settings.emplace().loadFromConfig(config, "s3", getSettingsRef()); - } - - return *shared->storage_s3_settings; -} - -const ServerSettings & Context::getServerSettings() const -{ - return shared->server_settings; -} - -bool Context::hasTraceCollector() const -{ - return false; -} - -bool Context::isBackgroundOperationContext() const -{ - return false; -} - -} diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h deleted file mode 100644 index d3bbfececed..00000000000 --- a/src/Coordination/Standalone/Context.h +++ /dev/null @@ -1,178 +0,0 @@ -#pragma once - -#include - -#include - -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include - -#include - -#include - -#include "config.h" -namespace zkutil -{ - class ZooKeeper; - using ZooKeeperPtr = std::shared_ptr; -} - -namespace DB -{ - -struct ContextSharedPart; -class Macros; -class FilesystemCacheLog; -class FilesystemReadPrefetchesLog; -class BlobStorageLog; -class IOUringReader; -class S3SettingsByEndpoint; - -/// A small class which owns ContextShared. -/// We don't use something like unique_ptr directly to allow ContextShared type to be incomplete. -struct SharedContextHolder -{ - ~SharedContextHolder(); - SharedContextHolder(); - explicit SharedContextHolder(std::unique_ptr shared_context); - SharedContextHolder(SharedContextHolder &&) noexcept; - - SharedContextHolder & operator=(SharedContextHolder &&) noexcept; - - ContextSharedPart * get() const { return shared.get(); } - void reset(); -private: - std::unique_ptr shared; -}; - -class ContextData -{ -protected: - ContextWeakMutablePtr global_context; - inline static ContextPtr global_context_instance; - ContextSharedPart * shared; - - /// Query metrics for reading data asynchronously with IAsynchronousReader. - mutable std::shared_ptr async_read_counters; - - Settings settings; /// Setting for query execution. - -public: - /// Use copy constructor or createGlobal() instead - ContextData(); - ContextData(const ContextData &); -}; - -class Context : public ContextData, public std::enable_shared_from_this -{ -private: - /// ContextData mutex - mutable SharedMutex mutex; - - Context(); - Context(const Context &); - - std::unique_lock getGlobalLock() const; - - std::shared_lock getGlobalSharedLock() const; - - std::unique_lock getLocalLock() const; - - std::shared_lock getLocalSharedLock() const; - -public: - /// Create initial Context with ContextShared and etc. - static ContextMutablePtr createGlobal(ContextSharedPart * shared_part); - static SharedContextHolder createShared(); - - ContextMutablePtr getGlobalContext() const; - static ContextPtr getGlobalContextInstance() { return global_context_instance; } - - void makeGlobalContext(); - void initGlobal(); - - ~Context(); - - using ConfigurationPtr = Poco::AutoPtr; - - /// Global application configuration settings. - void setConfig(const ConfigurationPtr & config); - const Poco::Util::AbstractConfiguration & getConfigRef() const; - - const Settings & getSettingsRef() const { return settings; } - - String getPath() const; - void setPath(const String & path); - - MultiVersion::Version getMacros() const; - void setMacros(std::unique_ptr && macros); - - BackgroundSchedulePool & getSchedulePool() const; - - /// Storage of allowed hosts from config.xml - void setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config); - const RemoteHostFilter & getRemoteHostFilter() const; - - std::shared_ptr getFilesystemCacheLog() const; - std::shared_ptr getFilesystemReadPrefetchesLog() const; - std::shared_ptr getBlobStorageLog() const; - - enum class ApplicationType : uint8_t - { - KEEPER, - SERVER, - }; - - void setApplicationType(ApplicationType) {} - ApplicationType getApplicationType() const { return ApplicationType::KEEPER; } - - IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const; -#if USE_LIBURING - IOUringReader & getIOUringReader() const; -#endif - std::shared_ptr getAsyncReadCounters() const; - ThreadPool & getThreadPoolWriter() const; - - ThrottlerPtr getRemoteReadThrottler() const; - ThrottlerPtr getRemoteWriteThrottler() const; - - ThrottlerPtr getLocalReadThrottler() const; - ThrottlerPtr getLocalWriteThrottler() const; - - ReadSettings getReadSettings() const; - - /// Resource management related - ResourceManagerPtr getResourceManager() const; - ClassifierPtr getWorkloadClassifier() const; - - std::shared_ptr getKeeperDispatcher() const; - std::shared_ptr tryGetKeeperDispatcher() const; - void initializeKeeperDispatcher(bool start_async) const; - void shutdownKeeperDispatcher() const; - void updateKeeperConfiguration(const Poco::Util::AbstractConfiguration & config); - - zkutil::ZooKeeperPtr getZooKeeper() const; - - const S3SettingsByEndpoint & getStorageS3Settings() const; - - const String & getUserName() const { static std::string user; return user; } - - const ServerSettings & getServerSettings() const; - - bool hasTraceCollector() const; - - bool isBackgroundOperationContext() const; -}; - -} diff --git a/src/Coordination/Standalone/Settings.cpp b/src/Coordination/Standalone/Settings.cpp deleted file mode 100644 index 12a7a42ffac..00000000000 --- a/src/Coordination/Standalone/Settings.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include - -namespace DB -{ - -IMPLEMENT_SETTINGS_TRAITS(SettingsTraits, LIST_OF_SETTINGS) - -std::vector Settings::getAllRegisteredNames() const -{ - std::vector all_settings; - for (const auto & setting_field : all()) - { - all_settings.push_back(setting_field.getName()); - } - return all_settings; -} - -void Settings::set(std::string_view name, const Field & value) -{ - BaseSettings::set(name, value); -} - - -} diff --git a/src/Coordination/Standalone/ThreadStatusExt.cpp b/src/Coordination/Standalone/ThreadStatusExt.cpp deleted file mode 100644 index fc78233d9dc..00000000000 --- a/src/Coordination/Standalone/ThreadStatusExt.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include -#include - -namespace DB -{ - -void CurrentThread::detachFromGroupIfNotDetached() -{ -} - -void CurrentThread::attachToGroup(const ThreadGroupPtr &) -{ -} - -void ThreadStatus::initGlobalProfiler(UInt64 /*global_profiler_real_time_period*/, UInt64 /*global_profiler_cpu_time_period*/) -{ -} - -} diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index caa8b3fdffd..7d094e2a107 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -380,15 +380,6 @@ void SettingFieldString::readBinary(ReadBuffer & in) *this = std::move(str); } -/// Unbeautiful workaround for clickhouse-keeper standalone build ("-DBUILD_STANDALONE_KEEPER=1"). -/// In this build, we don't build and link library dbms (to which SettingsField.cpp belongs) but -/// only build SettingsField.cpp. Further dependencies, e.g. DataTypeString and DataTypeMap below, -/// require building of further files for clickhouse-keeper. To keep dependencies slim, we don't do -/// that. The linker does not complain only because clickhouse-keeper does not call any of below -/// functions. A cleaner alternative would be more modular libraries, e.g. one for data types, which -/// could then be linked by the server and the linker. -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD - SettingFieldMap::SettingFieldMap(const Field & f) : value(fieldToMap(f)) {} String SettingFieldMap::toString() const @@ -428,42 +419,6 @@ void SettingFieldMap::readBinary(ReadBuffer & in) *this = map; } -#else - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -SettingFieldMap::SettingFieldMap(const Field &) : value(Map()) {} -String SettingFieldMap::toString() const -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - - -SettingFieldMap & SettingFieldMap::operator =(const Field &) -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - -void SettingFieldMap::parseFromString(const String &) -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - -void SettingFieldMap::writeBinary(WriteBuffer &) const -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - -void SettingFieldMap::readBinary(ReadBuffer &) -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - -#endif - namespace { char stringToChar(const String & str) diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 19809348921..266141815e3 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -247,12 +247,6 @@ struct SettingFieldString void readBinary(ReadBuffer & in); }; -#ifdef CLICKHOUSE_KEEPER_STANDALONE_BUILD -#define NORETURN [[noreturn]] -#else -#define NORETURN -#endif - struct SettingFieldMap { public: @@ -269,11 +263,11 @@ public: operator const Map &() const { return value; } /// NOLINT explicit operator Field() const { return value; } - NORETURN String toString() const; - NORETURN void parseFromString(const String & str); + String toString() const; + void parseFromString(const String & str); - NORETURN void writeBinary(WriteBuffer & out) const; - NORETURN void readBinary(ReadBuffer & in); + void writeBinary(WriteBuffer & out) const; + void readBinary(ReadBuffer & in); }; #undef NORETURN diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index b2c425ceb79..48f76769a09 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -502,9 +502,7 @@ private: if (collectCrashLog) collectCrashLog(sig, thread_num, query_id, stack_trace); -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD Context::getGlobalContextInstance()->handleCrash(); -#endif /// Send crash report to developers (if configured) if (sig != SanitizerTrap) @@ -533,8 +531,6 @@ private: } } - /// ClickHouse Keeper does not link to some parts of Settings. -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// List changed settings. if (!query_id.empty()) { @@ -549,7 +545,6 @@ private: LOG_FATAL(log, "Changed settings: {}", changed_settings); } } -#endif /// When everything is done, we will try to send these error messages to the client. if (thread_ptr) diff --git a/src/Daemon/SentryWriter.cpp b/src/Daemon/SentryWriter.cpp index 9479dd65730..c51a1100639 100644 --- a/src/Daemon/SentryWriter.cpp +++ b/src/Daemon/SentryWriter.cpp @@ -19,7 +19,7 @@ #include "config.h" #include -#if USE_SENTRY && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_SENTRY # include # include diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index c77709c27eb..bb9761a3905 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -78,7 +78,6 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c std::unique_ptr buf; -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD if (with_file_cache) { auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path); @@ -96,7 +95,6 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c /* read_until_position */std::nullopt, cache_log); } -#endif /// Can't wrap CachedOnDiskReadBufferFromFile in CachedInMemoryReadBufferFromFile because the /// former doesn't support seeks. diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 59cc82d8c81..5c45a258806 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -195,7 +195,6 @@ public: /// DiskObjectStorage(CachedObjectStorage(CachedObjectStorage(S3ObjectStorage))) String getStructure() const { return fmt::format("DiskObjectStorage-{}({})", getName(), object_storage->getName()); } -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// Add a cache layer. /// Example: DiskObjectStorage(S3ObjectStorage) -> DiskObjectStorage(CachedObjectStorage(S3ObjectStorage)) /// There can be any number of cache layers: @@ -204,7 +203,6 @@ public: /// Get names of all cache layers. Name is how cache is defined in configuration file. NameSet getCacheLayersNames() const override; -#endif bool supportsStat() const override { return metadata_storage->supportsStat(); } struct stat stat(const String & path) const override; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp index 44854633d65..56d5d11ef8a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -222,11 +222,7 @@ ObjectKeyWithMetadata DiskObjectStorageMetadata::popLastObject() bool DiskObjectStorageMetadata::getWriteFullObjectKeySetting() { -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD return Context::getGlobalContextInstance()->getServerSettings().storage_metadata_write_full_object_key; -#else - return false; -#endif } } diff --git a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp index ab7c2069b43..a690ecd2757 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp @@ -2,9 +2,7 @@ #include #include #include -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include -#endif #include #include @@ -135,7 +133,6 @@ void registerPlainRewritableMetadataStorage(MetadataStorageFactory & factory) }); } -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerMetadataStorageFromStaticFilesWebServer(MetadataStorageFactory & factory) { factory.registerMetadataStorageType("web", []( @@ -147,7 +144,6 @@ void registerMetadataStorageFromStaticFilesWebServer(MetadataStorageFactory & fa return std::make_shared(assert_cast(*object_storage)); }); } -#endif void registerMetadataStorages() { @@ -155,9 +151,7 @@ void registerMetadataStorages() registerMetadataStorageFromDisk(factory); registerPlainMetadataStorage(factory); registerPlainRewritableMetadataStorage(factory); -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD registerMetadataStorageFromStaticFilesWebServer(factory); -#endif } } diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 1bf8250adff..5698d2ad588 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -7,19 +7,17 @@ #include #include #endif -#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_HDFS #include #include #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE #include #include #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include #include #include -#endif #include #include #include @@ -284,7 +282,7 @@ void registerS3PlainRewritableObjectStorage(ObjectStorageFactory & factory) #endif -#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_HDFS void registerHDFSObjectStorage(ObjectStorageFactory & factory) { factory.registerObjectStorageType( @@ -309,7 +307,7 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory) } #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE void registerAzureObjectStorage(ObjectStorageFactory & factory) { auto creator = []( @@ -333,7 +331,6 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) } #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerWebObjectStorage(ObjectStorageFactory & factory) { factory.registerObjectStorageType("web", []( @@ -381,7 +378,6 @@ void registerLocalObjectStorage(ObjectStorageFactory & factory) factory.registerObjectStorageType("local_blob_storage", creator); factory.registerObjectStorageType("local", creator); } -#endif void registerObjectStorages() { @@ -393,18 +389,16 @@ void registerObjectStorages() registerS3PlainRewritableObjectStorage(factory); #endif -#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_HDFS registerHDFSObjectStorage(factory); #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE registerAzureObjectStorage(factory); #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD registerWebObjectStorage(factory); registerLocalObjectStorage(factory); -#endif } } diff --git a/src/Disks/ObjectStorages/createMetadataStorageMetrics.h b/src/Disks/ObjectStorages/createMetadataStorageMetrics.h index 6dddc227ade..5cf1fbef2ab 100644 --- a/src/Disks/ObjectStorages/createMetadataStorageMetrics.h +++ b/src/Disks/ObjectStorages/createMetadataStorageMetrics.h @@ -1,14 +1,14 @@ #pragma once +#include "config.h" + #if USE_AWS_S3 # include #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE # include #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD -# include -#endif +#include #include namespace ProfileEvents @@ -42,7 +42,7 @@ inline MetadataStorageMetrics MetadataStorageMetrics::create inline MetadataStorageMetrics MetadataStorageMetrics::create() { @@ -53,7 +53,6 @@ inline MetadataStorageMetrics MetadataStorageMetrics::create inline MetadataStorageMetrics MetadataStorageMetrics::create() { @@ -62,6 +61,5 @@ inline MetadataStorageMetrics MetadataStorageMetrics::creategetBlobStorageLog()) { auto log_writer = std::make_shared(std::move(blob_storage_log)); @@ -67,7 +66,6 @@ BlobStorageLogWriterPtr BlobStorageLogWriter::create(const String & disk_name) return log_writer; } -#endif return {}; } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index f9b91a45978..d3f152b7a67 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1,7 +1,5 @@ #pragma once -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD - #include #include #include @@ -1451,9 +1449,3 @@ struct HTTPContext : public IHTTPContext }; } - -#else - -#include - -#endif diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index dff960f7031..1f3e038a1f5 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -18,9 +18,6 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe { try { - /// Raw config reference is used here to avoid dependency on Context and ServerSettings. - /// This is painful, because this class is also used in a build with CLICKHOUSE_KEEPER_STANDALONE_BUILD=1 - /// And there ordinary Context is replaced with a tiny clone. const auto & config = server.config(); unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); diff --git a/src/Server/ProtocolServerAdapter.cpp b/src/Server/ProtocolServerAdapter.cpp index 8d14a849894..b41ad2376f1 100644 --- a/src/Server/ProtocolServerAdapter.cpp +++ b/src/Server/ProtocolServerAdapter.cpp @@ -1,7 +1,7 @@ #include #include -#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_GRPC #include #endif @@ -37,7 +37,7 @@ ProtocolServerAdapter::ProtocolServerAdapter( { } -#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_GRPC class ProtocolServerAdapter::GRPCServerAdapterImpl : public Impl { public: diff --git a/src/Server/ProtocolServerAdapter.h b/src/Server/ProtocolServerAdapter.h index dd11c1dfc58..76a6776ed9c 100644 --- a/src/Server/ProtocolServerAdapter.h +++ b/src/Server/ProtocolServerAdapter.h @@ -23,7 +23,7 @@ public: ProtocolServerAdapter & operator =(ProtocolServerAdapter && src) = default; ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr tcp_server_); -#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_GRPC ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr grpc_server_); #endif From 0b24a416b5f50bb4416d4d95dddd20a1e333b569 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Mon, 1 Jul 2024 10:31:29 +0000 Subject: [PATCH 200/439] Also throw error if bit shift positions is greater than the bit width of value --- src/Functions/bitShiftLeft.cpp | 19 ++++++++++------- src/Functions/bitShiftRight.cpp | 19 +++++++++-------- ...ror_for_negative_shift_positions.reference | 0 ...ows_error_for_negative_shift_positions.sql | 7 ------- ...t_throws_error_for_out_of_bounds.reference | 1 + ...t_shift_throws_error_for_out_of_bounds.sql | 21 +++++++++++++++++++ 6 files changed, 43 insertions(+), 24 deletions(-) delete mode 100644 tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.reference delete mode 100644 tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.sql create mode 100644 tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference create mode 100644 tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index c3f5de628aa..9d32e5b5ca4 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -7,6 +7,7 @@ namespace ErrorCodes { extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; + extern const int ARGUMENT_OUT_OF_BOUND; } namespace @@ -24,8 +25,8 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); - else if (b < 0) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); + else if (b < 0 || b > B(8 * sizeof(A))) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) << static_cast(b); else @@ -37,13 +38,15 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); - else if (b < 0) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else { UInt8 word_size = 8; + size_t n = end - pos; + if (b < 0 || b > B(word_size * n)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); + /// To prevent overflow - if (static_cast(b) >= (static_cast(end - pos) * word_size) || b < 0) + if (static_cast(b) >= (static_cast(n) * word_size)) { // insert default value out_vec.push_back(0); @@ -104,14 +107,14 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); - else if (b < 0) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else { UInt8 word_size = 8; size_t n = end - pos; + if (b < 0 || b > B(word_size * n)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size) || b < 0) + if (static_cast(b) >= (static_cast(n) * word_size)) { // insert default value out_vec.resize_fill(out_vec.size() + n); diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index b53485c45f5..13b210a4f63 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -8,7 +8,7 @@ namespace ErrorCodes { extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ARGUMENT_OUT_OF_BOUND; } namespace @@ -26,8 +26,8 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); - else if (b < 0) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); + else if (b < 0 || b > B(8 * sizeof(A))) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) >> static_cast(b); else @@ -54,13 +54,14 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); - else if (b < 0) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else { UInt8 word_size = 8; + size_t n = end - pos; + if (b < 0 || b > B(word_size * n)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); /// To prevent overflow - if (static_cast(b) >= (static_cast(end - pos) * word_size) || b < 0) + if (static_cast(b) >= (static_cast(n) * word_size)) { /// insert default value out_vec.push_back(0); @@ -93,14 +94,14 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); - else if (b < 0) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else { UInt8 word_size = 8; size_t n = end - pos; + if (b < 0 || b > B(word_size * n)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size) || b < 0) + if (static_cast(b) >= (static_cast(n) * word_size)) { // insert default value out_vec.resize_fill(out_vec.size() + n); diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.reference b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.sql b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.sql deleted file mode 100644 index 659d03d1951..00000000000 --- a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.sql +++ /dev/null @@ -1,7 +0,0 @@ -SELECT bitShiftRight(1, -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT bitShiftRight('hola', -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT bitShiftRight(toFixedString('hola', 10), -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } - -SELECT bitShiftLeft(1, -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT bitShiftLeft('hola', -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT bitShiftLeft(toFixedString('hola', 10), -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } \ No newline at end of file diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql new file mode 100644 index 00000000000..9cfc6f00b91 --- /dev/null +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql @@ -0,0 +1,21 @@ +SELECT bitShiftRight(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toUInt8(1), number) FROM numbers(8 + 1) FORMAT Null; +SELECT bitShiftRight(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight('hola', number) FROM numbers(4 * 8 + 1) FORMAT Null; +SELECT bitShiftRight('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toFixedString('hola', 8), number) FROM numbers(8 * 8 + 1) FORMAT Null; +SELECT bitShiftRight(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } + +SELECT bitShiftLeft(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toUInt8(1), number) FROM numbers(8 + 1) FORMAT Null; +SELECT bitShiftLeft(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft('hola', number) FROM numbers(4 * 8 + 1) FORMAT Null; +SELECT bitShiftLeft('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toFixedString('hola', 8), number) FROM numbers(8 * 8 + 1) FORMAT Null; +SELECT bitShiftLeft(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } + +SELECT 'OK'; \ No newline at end of file From 0fed338ac26bb7a974fe8cfe8ac1b6dacbc3f4df Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Mon, 1 Jul 2024 11:03:25 +0000 Subject: [PATCH 201/439] Fix other bit shift tests after out of bounds check --- ...t_shift_right_for_string_integer.reference | 39 ------------------- ...016_bit_shift_right_for_string_integer.sql | 19 ++++----- ...it_shift_left_for_string_integer.reference | 39 ------------------- ...2017_bit_shift_left_for_string_integer.sql | 19 ++++----- ...t_shift_throws_error_for_out_of_bounds.sql | 6 --- 5 files changed, 16 insertions(+), 106 deletions(-) diff --git a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference index e6a2b2b6aaf..ab832478da0 100644 --- a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference +++ b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference @@ -41,8 +41,6 @@ String ConstConst 38 Hello 00000001 39 Hello 00000000 40 Hello -41 Hello -42 Hello FixedString ConstConst 1 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 @@ -92,10 +90,8 @@ FixedString ConstConst 78 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000001 79 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 80 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -81 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 String VectorVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 0010010000110010101101100011011000110111 7 Hello 0000000010010000110010101101100011011000 @@ -112,8 +108,6 @@ String VectorVector 33 Hello 00100100 39 Hello 00000000 40 Hello -41 Hello -42 Hello 7 Hel 000000001001000011001010 8 Hel 0100100001100101 9 Hel 0010010000110010 @@ -125,7 +119,6 @@ String VectorVector 9 Hel 0010010000110010 FixedString VectorVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 00100100001100101011011000110110001101111000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 @@ -142,8 +135,6 @@ FixedString VectorVector 33 Hello\0\0\0\0\0 00000000000000000000000000000000001001000011001010110110001101100011011110000000 39 Hello\0\0\0\0\0 00000000000000000000000000000000000000001001000011001010110110001101100011011110 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000100100001100101011011000110110001101111 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000010010000110010101101100011011000110111 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000001001000011001010110110001101100011011 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 00000000001001000011001010110110000000000000000000000000000000000000000000000000 @@ -171,9 +162,6 @@ String VectorConst 7 Hello 0000000010010000110010101101100011011000 7 Hello 0000000010010000110010101101100011011000 7 Hello 0000000010010000110010101101100011011000 -7 Hello 0000000010010000110010101101100011011000 -7 Hello 0000000010010000110010101101100011011000 -7 Hello 0000000010010000110010101101100011011000 7 Hel 000000001001000011001010 7 Hel 000000001001000011001010 7 Hel 000000001001000011001010 @@ -193,9 +181,6 @@ String VectorConst 8 Hello 01001000011001010110110001101100 8 Hello 01001000011001010110110001101100 8 Hello 01001000011001010110110001101100 -8 Hello 01001000011001010110110001101100 -8 Hello 01001000011001010110110001101100 -8 Hello 01001000011001010110110001101100 8 Hel 0100100001100101 8 Hel 0100100001100101 8 Hel 0100100001100101 @@ -217,9 +202,6 @@ FixedString VectorConst 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 @@ -239,15 +221,11 @@ FixedString VectorConst 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 -8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 -8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 -8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 String ConstVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 0010010000110010101101100011011000110111 7 Hello 0000000010010000110010101101100011011000 @@ -264,12 +242,9 @@ String ConstVector 33 Hello 00100100 39 Hello 00000000 40 Hello -41 Hello -42 Hello 7 Hello 0000000010010000110010101101100011011000 8 Hello 01001000011001010110110001101100 9 Hello 00100100001100101011011000110110 --1 Hel 0 Hel 010010000110010101101100 1 Hel 001001000011001010110110 7 Hel 000000001001000011001010 @@ -280,20 +255,11 @@ String ConstVector 17 Hel 00100100 23 Hel 00000000 24 Hel -25 Hel -31 Hel -32 Hel -33 Hel -39 Hel -40 Hel -41 Hel -42 Hel 7 Hel 000000001001000011001010 8 Hel 0100100001100101 9 Hel 0010010000110010 FixedString ConstVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 00100100001100101011011000110110001101111000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 @@ -310,12 +276,9 @@ FixedString ConstVector 33 Hello\0\0\0\0\0 00000000000000000000000000000000001001000011001010110110001101100011011110000000 39 Hello\0\0\0\0\0 00000000000000000000000000000000000000001001000011001010110110001101100011011110 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000100100001100101011011000110110001101111 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000010010000110010101101100011011000110111 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000001001000011001010110110001101100011011 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 9 Hello\0\0\0\0\0 00000000001001000011001010110110001101100011011110000000000000000000000000000000 --1 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hel\0\0\0\0\0\0\0 01001000011001010110110000000000000000000000000000000000000000000000000000000000 1 Hel\0\0\0\0\0\0\0 00100100001100101011011000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 @@ -332,8 +295,6 @@ FixedString ConstVector 33 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000001001000011001010110110000000000000000000000000 39 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000001001000011001010110110000000000000000000 40 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000100100001100101011011000000000000000000 -41 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000010010000110010101101100000000000000000 -42 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000001001000011001010110110000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 00000000001001000011001010110110000000000000000000000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql index 0ee04e408ba..40fccbc89e6 100644 --- a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql +++ b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql @@ -41,8 +41,6 @@ SELECT 37,'Hello',bin(bitShiftRight('Hello', 37)); SELECT 38,'Hello',bin(bitShiftRight('Hello', 38)); SELECT 39,'Hello',bin(bitShiftRight('Hello', 39)); SELECT 40,'Hello',bin(bitShiftRight('Hello', 40)); -SELECT 41,'Hello',bin(bitShiftRight('Hello', 41)); -SELECT 42,'Hello',bin(bitShiftRight('Hello', 42)); SELECT 'FixedString ConstConst'; SELECT bin(toFixedString('Hello', 10)) == bin(bitShiftRight(toFixedString('Hello', 10), 0)); @@ -93,40 +91,39 @@ SELECT 77,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 1 SELECT 78,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 78)); SELECT 79,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 79)); SELECT 80,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 80)); -SELECT 81,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 81)); DROP TABLE IF EXISTS test_bit_shift_right_string_integer; CREATE TABLE test_bit_shift_right_string_integer (str String, fixedStr FixedString(10), id Int64) engine=Log; -INSERT INTO test_bit_shift_right_string_integer VALUES('Hello','Hello',-1)('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hello','Hello',41),('Hello','Hello',42),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); +INSERT INTO test_bit_shift_right_string_integer VALUES('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'String VectorVector'; SELECT id as shift_right_bit,str as arg,bin(bitShiftRight(str, id)) as string_res FROM test_bit_shift_right_string_integer; SELECT id as shift_right_bit,str as arg,bin(bitShiftRight(str, id)) as string_res FROM test_bit_shift_right_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftRight('Hello', 42)); +SELECT bin(bitShiftRight('Hello', 40)); SELECT 'FixedString VectorVector'; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, id)) as fixed_string_res FROM test_bit_shift_right_string_integer; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, id)) as fixed_string_res FROM test_bit_shift_right_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'String VectorConst'; SELECT 7 as shift_right_bit,str as arg,bin(bitShiftRight(str, 7)) as string_res FROM test_bit_shift_right_string_integer; SELECT 8 as shift_right_bit,str as arg,bin(bitShiftRight(str, 8)) as string_res FROM test_bit_shift_right_string_integer; -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'FixedString VectorConst'; SELECT 7 as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, 7)) as fixed_string_res FROM test_bit_shift_right_string_integer; SELECT 8 as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, 8)) as fixed_string_res FROM test_bit_shift_right_string_integer; -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'String ConstVector'; SELECT id as shift_right_bit,'Hello' as arg,bin(bitShiftRight('Hello', id)) as string_res FROM test_bit_shift_right_string_integer; -SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftRight('Hel', id)) as string_res FROM test_bit_shift_right_string_integer; +SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftRight('Hel', id)) as string_res FROM test_bit_shift_right_string_integer WHERE id <= 8 * 3; -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'FixedString ConstVector'; SELECT id as shift_right_bit,toFixedString('Hello', 10) as arg,bin(bitShiftRight(toFixedString('Hello', 10), id)) as fixed_string_res FROM test_bit_shift_right_string_integer; SELECT id as shift_right_bit,toFixedString('Hel', 10) as arg,bin(bitShiftRight(toFixedString('Hel', 10), id)) as fixed_string_res FROM test_bit_shift_right_string_integer; diff --git a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference index ff5a09c0d48..a20c44bbb9a 100644 --- a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference +++ b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference @@ -41,8 +41,6 @@ String ConstConst 38 Hello 00010010000110010101101100011011000110111100000000000000000000000000000000000000 39 Hello 00100100001100101011011000110110001101111000000000000000000000000000000000000000 40 Hello -41 Hello -42 Hello FixedString ConstConst 1 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 @@ -92,10 +90,8 @@ FixedString ConstConst 78 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 79 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 80 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -81 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 String VectorVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 000000001001000011001010110110001101100011011110 7 Hello 001001000011001010110110001101100011011110000000 @@ -112,8 +108,6 @@ String VectorVector 33 Hello 00000000100100001100101011011000110110001101111000000000000000000000000000000000 39 Hello 00100100001100101011011000110110001101111000000000000000000000000000000000000000 40 Hello -41 Hello -42 Hello 7 Hel 00100100001100101011011000000000 8 Hel 01001000011001010110110000000000 9 Hel 0000000010010000110010101101100000000000 @@ -125,7 +119,6 @@ String VectorVector 9 Hel 0000000010010000110010101101100000000000 FixedString VectorVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 10010000110010101101100011011000110111100000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 @@ -142,8 +135,6 @@ FixedString VectorVector 33 Hello\0\0\0\0\0 11011110000000000000000000000000000000000000000000000000000000000000000000000000 39 Hello\0\0\0\0\0 10000000000000000000000000000000000000000000000000000000000000000000000000000000 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 11001010110110000000000000000000000000000000000000000000000000000000000000000000 @@ -171,9 +162,6 @@ String VectorConst 7 Hello 001001000011001010110110001101100011011110000000 7 Hello 001001000011001010110110001101100011011110000000 7 Hello 001001000011001010110110001101100011011110000000 -7 Hello 001001000011001010110110001101100011011110000000 -7 Hello 001001000011001010110110001101100011011110000000 -7 Hello 001001000011001010110110001101100011011110000000 7 Hel 00100100001100101011011000000000 7 Hel 00100100001100101011011000000000 7 Hel 00100100001100101011011000000000 @@ -193,9 +181,6 @@ String VectorConst 8 Hello 010010000110010101101100011011000110111100000000 8 Hello 010010000110010101101100011011000110111100000000 8 Hello 010010000110010101101100011011000110111100000000 -8 Hello 010010000110010101101100011011000110111100000000 -8 Hello 010010000110010101101100011011000110111100000000 -8 Hello 010010000110010101101100011011000110111100000000 8 Hel 01001000011001010110110000000000 8 Hel 01001000011001010110110000000000 8 Hel 01001000011001010110110000000000 @@ -217,9 +202,6 @@ FixedString VectorConst 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 @@ -239,15 +221,11 @@ FixedString VectorConst 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 -8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 -8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 -8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 String ConstVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 000000001001000011001010110110001101100011011110 7 Hello 001001000011001010110110001101100011011110000000 @@ -264,12 +242,9 @@ String ConstVector 33 Hello 00000000100100001100101011011000110110001101111000000000000000000000000000000000 39 Hello 00100100001100101011011000110110001101111000000000000000000000000000000000000000 40 Hello -41 Hello -42 Hello 7 Hello 001001000011001010110110001101100011011110000000 8 Hello 010010000110010101101100011011000110111100000000 9 Hello 00000000100100001100101011011000110110001101111000000000 --1 Hel 0 Hel 010010000110010101101100 1 Hel 00000000100100001100101011011000 7 Hel 00100100001100101011011000000000 @@ -280,20 +255,11 @@ String ConstVector 17 Hel 000000001001000011001010110110000000000000000000 23 Hel 001001000011001010110110000000000000000000000000 24 Hel -25 Hel -31 Hel -32 Hel -33 Hel -39 Hel -40 Hel -41 Hel -42 Hel 7 Hel 00100100001100101011011000000000 8 Hel 01001000011001010110110000000000 9 Hel 0000000010010000110010101101100000000000 FixedString ConstVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 10010000110010101101100011011000110111100000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 @@ -310,12 +276,9 @@ FixedString ConstVector 33 Hello\0\0\0\0\0 11011110000000000000000000000000000000000000000000000000000000000000000000000000 39 Hello\0\0\0\0\0 10000000000000000000000000000000000000000000000000000000000000000000000000000000 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 9 Hello\0\0\0\0\0 11001010110110001101100011011110000000000000000000000000000000000000000000000000 --1 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hel\0\0\0\0\0\0\0 01001000011001010110110000000000000000000000000000000000000000000000000000000000 1 Hel\0\0\0\0\0\0\0 10010000110010101101100000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 @@ -332,8 +295,6 @@ FixedString ConstVector 33 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 39 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 40 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -41 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -42 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 11001010110110000000000000000000000000000000000000000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql index 5c7a9901dae..a8e66eda281 100644 --- a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql +++ b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql @@ -41,8 +41,6 @@ SELECT 37,'Hello',bin(bitShiftLeft('Hello', 37)); SELECT 38,'Hello',bin(bitShiftLeft('Hello', 38)); SELECT 39,'Hello',bin(bitShiftLeft('Hello', 39)); SELECT 40,'Hello',bin(bitShiftLeft('Hello', 40)); -SELECT 41,'Hello',bin(bitShiftLeft('Hello', 41)); -SELECT 42,'Hello',bin(bitShiftLeft('Hello', 42)); SELECT 'FixedString ConstConst'; SELECT bin(toFixedString('Hello', 10)) == bin(bitShiftLeft(toFixedString('Hello', 10), 0)); @@ -93,40 +91,39 @@ SELECT 77,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10 SELECT 78,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 78)); SELECT 79,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 79)); SELECT 80,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 80)); -SELECT 81,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 81)); DROP TABLE IF EXISTS test_bit_shift_left_string_integer; CREATE TABLE test_bit_shift_left_string_integer (str String, fixedStr FixedString(10), id Int64) engine=Log; -INSERT INTO test_bit_shift_left_string_integer VALUES('Hello','Hello',-1)('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hello','Hello',41),('Hello','Hello',42),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); +INSERT INTO test_bit_shift_left_string_integer VALUES('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'String VectorVector'; SELECT id as shift_right_bit,str as arg,bin(bitShiftLeft(str, id)) as string_res FROM test_bit_shift_left_string_integer; SELECT id as shift_right_bit,str as arg,bin(bitShiftLeft(str, id)) as string_res FROM test_bit_shift_left_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftLeft('Hello', 42)); +SELECT bin(bitShiftLeft('Hello', 40)); SELECT 'FixedString VectorVector'; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, id)) as fixed_string_res FROM test_bit_shift_left_string_integer; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, id)) as fixed_string_res FROM test_bit_shift_left_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'String VectorConst'; SELECT 7 as shift_right_bit,str as arg,bin(bitShiftLeft(str, 7)) as string_res FROM test_bit_shift_left_string_integer; SELECT 8 as shift_right_bit,str as arg,bin(bitShiftLeft(str, 8)) as string_res FROM test_bit_shift_left_string_integer; -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'FixedString VectorConst'; SELECT 7 as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, 7)) as fixed_string_res FROM test_bit_shift_left_string_integer; SELECT 8 as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, 8)) as fixed_string_res FROM test_bit_shift_left_string_integer; -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'String ConstVector'; SELECT id as shift_right_bit,'Hello' as arg,bin(bitShiftLeft('Hello', id)) as string_res FROM test_bit_shift_left_string_integer; -SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftLeft('Hel', id)) as string_res FROM test_bit_shift_left_string_integer; +SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftLeft('Hel', id)) as string_res FROM test_bit_shift_left_string_integer WHERE id <= 8 * 3; -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'FixedString ConstVector'; SELECT id as shift_right_bit,toFixedString('Hello', 10) as arg,bin(bitShiftLeft(toFixedString('Hello', 10), id)) as fixed_string_res FROM test_bit_shift_left_string_integer; SELECT id as shift_right_bit,toFixedString('Hel', 10) as arg,bin(bitShiftLeft(toFixedString('Hel', 10), id)) as fixed_string_res FROM test_bit_shift_left_string_integer; diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql index 9cfc6f00b91..a1a246593d8 100644 --- a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql @@ -1,21 +1,15 @@ SELECT bitShiftRight(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftRight(toUInt8(1), number) FROM numbers(8 + 1) FORMAT Null; SELECT bitShiftRight(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftRight('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftRight('hola', number) FROM numbers(4 * 8 + 1) FORMAT Null; SELECT bitShiftRight('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftRight(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftRight(toFixedString('hola', 8), number) FROM numbers(8 * 8 + 1) FORMAT Null; SELECT bitShiftRight(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftLeft(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftLeft(toUInt8(1), number) FROM numbers(8 + 1) FORMAT Null; SELECT bitShiftLeft(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftLeft('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftLeft('hola', number) FROM numbers(4 * 8 + 1) FORMAT Null; SELECT bitShiftLeft('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftLeft(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftLeft(toFixedString('hola', 8), number) FROM numbers(8 * 8 + 1) FORMAT Null; SELECT bitShiftLeft(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT 'OK'; \ No newline at end of file From bb0b93f77db0a3e3618523b55766a7e61700bc55 Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 1 Jul 2024 11:11:16 +0000 Subject: [PATCH 202/439] Change wrong implementations of copy and move --- programs/disks/CommandCopy.cpp | 54 ++++++++++++++++++++++++++++++-- programs/disks/CommandMkDir.cpp | 6 ++-- programs/disks/CommandMove.cpp | 33 +++++++++++++++++-- programs/disks/CommandRemove.cpp | 27 ++++++++++++++-- programs/disks/DisksClient.cpp | 2 +- programs/disks/ICommand.h | 16 ++++++++++ 6 files changed, 126 insertions(+), 12 deletions(-) diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index 4ba8a9ecbc2..0938e88a7f5 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -1,4 +1,5 @@ #include +#include "Common/Exception.h" #include #include "DisksClient.h" #include "ICommand.h" @@ -17,7 +18,8 @@ public: "disk-from", po::value(), "disk from which we copy is executed (default value is a current disk)")( "disk-to", po::value(), "disk to which copy is executed (default value is a current disk)")( "path-from", po::value(), "path from which copy is executed (mandatory, positional)")( - "path-to", po::value(), "path to which copy is executed (mandatory, positional)"); + "path-to", po::value(), "path to which copy is executed (mandatory, positional)")( + "recursive", "recursively copy the directory"); positional_options_description.add("path-from", 1); positional_options_description.add("path-to", 1); } @@ -28,9 +30,55 @@ public: auto disk_to = getDiskWithPath(client, options, "disk-to"); String path_from = disk_from.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); String path_to = disk_to.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); + bool recursive = options.count("recursive"); - disk_from.getDisk()->copyDirectoryContent( - path_from, disk_to.getDisk(), path_to, /* read_settings= */ {}, /* write_settings= */ {}, /* cancellation_hook= */ {}); + if (!disk_from.getDisk()->exists(path_from)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot stat '{}': No such file or directory", path_from); + } + else if (disk_from.getDisk()->isFile(path_from)) + { + auto target_location = getTargetLocation(path_from, disk_to, path_to); + if (!disk_to.getDisk()->exists(target_location) || disk_to.getDisk()->isFile(target_location)) + { + disk_from.getDisk()->copyFile( + path_from, + *disk_to.getDisk(), + target_location, + /* read_settings= */ {}, + /* write_settings= */ {}, + /* cancellation_hook= */ {}); + } + else + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "cannot overwrite directory {} with non-directory {}", target_location, path_from); + } + } + else if (disk_from.getDisk()->isDirectory(path_from)) + { + if (!recursive) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "--recursive not specified; omitting directory {}", path_from); + } + auto target_location = getTargetLocation(path_from, disk_to, path_to); + + if (disk_to.getDisk()->isFile(target_location)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory {} with directory {}", path_to, target_location); + } + else if (!disk_to.getDisk()->exists(target_location)) + { + disk_to.getDisk()->createDirectory(target_location); + } + disk_from.getDisk()->copyDirectoryContent( + path_from, + disk_to.getDisk(), + target_location, + /* read_settings= */ {}, + /* write_settings= */ {}, + /* cancellation_hook= */ {}); + } } }; diff --git a/programs/disks/CommandMkDir.cpp b/programs/disks/CommandMkDir.cpp index 3ea6df5622d..535936480d9 100644 --- a/programs/disks/CommandMkDir.cpp +++ b/programs/disks/CommandMkDir.cpp @@ -13,14 +13,14 @@ public: { command_name = "mkdir"; description = "Creates a directory"; - options_description.add_options()("recursive", "recursively create directories")( - "path", po::value(), "the path of listing (mandatory, positional)"); + options_description.add_options()("parents", "recursively create directories")( + "path", po::value(), "the path on which directory should be created (mandatory, positional)"); positional_options_description.add("path", 1); } void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - bool recursive = options.count("recursive"); + bool recursive = options.count("parents"); auto disk = client.getCurrentDiskWithPath(); String path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path")); diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 23144df3d35..6080fcf6811 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -25,9 +25,38 @@ public: String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); if (disk.getDisk()->isFile(path_from)) + { disk.getDisk()->moveFile(path_from, path_to); - else - disk.getDisk()->moveDirectory(path_from, path_to); + } + else if (disk.getDisk()->isDirectory(path_from)) + { + auto target_location = getTargetLocation(path_from, disk, path_to); + if (!disk.getDisk()->exists(target_location)) + { + disk.getDisk()->createDirectory(target_location); + disk.getDisk()->moveDirectory(path_from, target_location); + } + else + { + if (disk.getDisk()->isFile(target_location)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory '{}' with directory '{}'", target_location, path_from); + } + if (!disk.getDisk()->isDirectoryEmpty(target_location)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot move '{}' to '{}': Directory not empty", path_from, target_location); + } + else + { + disk.getDisk()->moveDirectory(path_from, target_location); + } + } + } + else if (!disk.getDisk()->exists(path_from)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot stat '{}': No such file or directory", path_from); + } } }; diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index b322fb2701f..d508645fc65 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -1,4 +1,5 @@ #include +#include "Common/Exception.h" #include "ICommand.h" namespace DB @@ -10,8 +11,9 @@ public: CommandRemove() { command_name = "remove"; - description = "Remove file or directory with all children. Throws exception if file doesn't exists"; - options_description.add_options()("path", po::value(), "path from which we copy (mandatory, positional)"); + description = "Remove file or directory. Throws exception if file doesn't exists"; + options_description.add_options()("path", po::value(), "path from which we copy (mandatory, positional)")( + "recursive", "recursively removes the directory (required to remove a directory)"); positional_options_description.add("path", 1); } @@ -19,7 +21,26 @@ public: { auto disk = client.getCurrentDiskWithPath(); const String & path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path")); - disk.getDisk()->removeRecursive(path); + bool recursive = options.count("recursive"); + if (!disk.getDisk()->exists(path)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} on disk {} doesn't exist", path, disk.getDisk()->getName()); + } + else if (disk.getDisk()->isDirectory(path)) + { + if (!recursive) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot remove '{}': Is a directory", path); + } + else + { + disk.getDisk()->removeRecursive(path); + } + } + else + { + disk.getDisk()->removeFileIfExists(path); + } } }; diff --git a/programs/disks/DisksClient.cpp b/programs/disks/DisksClient.cpp index 379c87e4f2f..7e36c7911ab 100644 --- a/programs/disks/DisksClient.cpp +++ b/programs/disks/DisksClient.cpp @@ -20,7 +20,7 @@ DiskWithPath::DiskWithPath(DiskPtr disk_, std::optional path_) : disk(di { if (!fs::path{path_.value()}.is_absolute()) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} is not absolute", path_.value()); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} is not absolute", path_.value()); } path = path_.value(); } diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h index 4b0ec731966..6faf90e2b52 100644 --- a/programs/disks/ICommand.h +++ b/programs/disks/ICommand.h @@ -100,6 +100,22 @@ protected: DiskWithPath & getDiskWithPath(DisksClient & client, const CommandLineOptions & options, const String & name); + String getTargetLocation(const String & path_from, DiskWithPath & disk_to, const String & path_to) + { + if (!disk_to.getDisk()->isDirectory(path_to)) + { + return path_to; + } + String copied_path_from = path_from; + if (copied_path_from.ends_with('/')) + { + copied_path_from.pop_back(); + } + String plain_filename = fs::path(copied_path_from).filename(); + + return fs::path{path_to} / plain_filename; + } + public: String command_name; From d0506f0214e949426c41fea2d9ebd79813422fd8 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Mon, 1 Jul 2024 12:23:54 +0000 Subject: [PATCH 203/439] Fix more tests One of tests actually uncovered a casting error :) --- src/Functions/bitShiftLeft.cpp | 2 +- src/Functions/bitShiftRight.cpp | 2 +- tests/queries/0_stateless/02366_kql_func_binary.reference | 3 --- tests/queries/0_stateless/02366_kql_func_binary.sql | 3 --- .../0_stateless/02766_bitshift_with_const_arguments.sql | 2 +- utils/check-style/check-style | 1 - 6 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index 9d32e5b5ca4..3d496296ba9 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -25,7 +25,7 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); - else if (b < 0 || b > B(8 * sizeof(A))) + else if (b < 0 || static_cast(b) > 8 * sizeof(A)) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) << static_cast(b); diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 13b210a4f63..0b41493fc6d 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -26,7 +26,7 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); - else if (b < 0 || b > B(8 * sizeof(A))) + else if (b < 0 || static_cast(b) > 8 * sizeof(A)) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) >> static_cast(b); diff --git a/tests/queries/0_stateless/02366_kql_func_binary.reference b/tests/queries/0_stateless/02366_kql_func_binary.reference index 6276cd6d867..360c1aa9899 100644 --- a/tests/queries/0_stateless/02366_kql_func_binary.reference +++ b/tests/queries/0_stateless/02366_kql_func_binary.reference @@ -1,7 +1,4 @@ -- binary functions 4 7 -1 -1 -1 7 3 1 diff --git a/tests/queries/0_stateless/02366_kql_func_binary.sql b/tests/queries/0_stateless/02366_kql_func_binary.sql index 824022b564c..687f3afb5ee 100644 --- a/tests/queries/0_stateless/02366_kql_func_binary.sql +++ b/tests/queries/0_stateless/02366_kql_func_binary.sql @@ -1,8 +1,5 @@ set dialect='kusto'; print ' -- binary functions'; print binary_and(4,7), binary_or(4,7); -print binary_shift_left(1, 1) == binary_shift_left(1, 65); -print binary_shift_right(2, 1) == binary_shift_right(2, 65); -print binary_shift_right(binary_shift_left(1, 65), 65) == 1; print binary_xor(2, 5), bitset_count_ones(42); print bitset_count_ones(binary_shift_left(binary_and(4,7), 1)); diff --git a/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql b/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql index 6b2961f0555..91e8624057c 100644 --- a/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql +++ b/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql @@ -10,7 +10,7 @@ DROP TABLE IF EXISTS t1; CREATE TABLE t0 (vkey UInt32, pkey UInt32, c0 UInt32) engine = TinyLog; CREATE TABLE t1 (vkey UInt32) ENGINE = AggregatingMergeTree ORDER BY vkey; INSERT INTO t0 VALUES (15, 25000, 58); -SELECT ref_5.pkey AS c_2_c2392_6 FROM t0 AS ref_5 WHERE 'J[' < multiIf(ref_5.pkey IN ( SELECT 1 ), bitShiftLeft(multiIf(ref_5.c0 > NULL, '1', ')'), 40), NULL); +SELECT ref_5.pkey AS c_2_c2392_6 FROM t0 AS ref_5 WHERE 'J[' < multiIf(ref_5.pkey IN ( SELECT 1 ), bitShiftLeft(multiIf(ref_5.c0 > NULL, '1', ')'), 40), NULL); -- { serverError ARGUMENT_OUT_OF_BOUND } DROP TABLE t0; DROP TABLE t1; diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 31972894c3d..380656cd1ca 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -94,7 +94,6 @@ EXTERN_TYPES_EXCLUDES=( ErrorCodes::values[i] ErrorCodes::getErrorCodeByName ErrorCodes::Value - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT ) for extern_type in ${!EXTERN_TYPES[@]}; do type_of_extern=${EXTERN_TYPES[$extern_type]} From fee7da5ed3aec7b852d15f13485789f7deeda50f Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 1 Jul 2024 12:46:17 +0000 Subject: [PATCH 204/439] Corrected tests --- programs/disks/CommandCopy.cpp | 8 ++++++-- programs/disks/CommandMove.cpp | 6 +++++- programs/disks/CommandRemove.cpp | 2 +- tests/integration/test_disks_app_func/test.py | 2 +- .../test_disks_app_interactive/test.py | 18 +++++++++++------- 5 files changed, 24 insertions(+), 12 deletions(-) diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index 0938e88a7f5..eef87535e51 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -19,7 +19,7 @@ public: "disk-to", po::value(), "disk to which copy is executed (default value is a current disk)")( "path-from", po::value(), "path from which copy is executed (mandatory, positional)")( "path-to", po::value(), "path to which copy is executed (mandatory, positional)")( - "recursive", "recursively copy the directory"); + "recursive,r", "recursively copy the directory"); positional_options_description.add("path-from", 1); positional_options_description.add("path-to", 1); } @@ -34,7 +34,11 @@ public: if (!disk_from.getDisk()->exists(path_from)) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot stat '{}': No such file or directory", path_from); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "cannot stat '{}' on disk '{}': No such file or directory", + path_from, + disk_from.getDisk()->getName()); } else if (disk_from.getDisk()->isFile(path_from)) { diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 6080fcf6811..22c1f851174 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -55,7 +55,11 @@ public: } else if (!disk.getDisk()->exists(path_from)) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot stat '{}': No such file or directory", path_from); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "cannot stat '{}' on disk: '{}': No such file or directory", + path_from, + disk.getDisk()->getName()); } } }; diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index d508645fc65..caa70905bef 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -13,7 +13,7 @@ public: command_name = "remove"; description = "Remove file or directory. Throws exception if file doesn't exists"; options_description.add_options()("path", po::value(), "path from which we copy (mandatory, positional)")( - "recursive", "recursively removes the directory (required to remove a directory)"); + "recursive,r", "recursively removes the directory (required to remove a directory)"); positional_options_description.add("path", 1); } diff --git a/tests/integration/test_disks_app_func/test.py b/tests/integration/test_disks_app_func/test.py index 34e45a9d626..56ea5c8846a 100644 --- a/tests/integration/test_disks_app_func/test.py +++ b/tests/integration/test_disks_app_func/test.py @@ -129,7 +129,7 @@ def test_disks_app_func_cp(started_cluster): "/usr/bin/clickhouse", "disks", "--query", - "copy --disk-from test1 --disk-to test2 . .", + "copy --recursive --disk-from test1 --disk-to test2 . .", ] ) diff --git a/tests/integration/test_disks_app_interactive/test.py b/tests/integration/test_disks_app_interactive/test.py index 79ffc3001a5..ca4ba5d9065 100644 --- a/tests/integration/test_disks_app_interactive/test.py +++ b/tests/integration/test_disks_app_interactive/test.py @@ -146,18 +146,22 @@ class DisksClient(object): path_to, disk_from: Optional[str] = None, disk_to: Optional[str] = None, + recursive: bool = False, ): disk_from_option = f"--disk-from {disk_from} " if disk_from is not None else "" disk_to_option = f"--disk-to {disk_to} " if disk_to is not None else "" + recursive_tag = "--recursive" if recursive else "" + self.execute_query( - f"copy {path_from} {path_to} {disk_from_option} {disk_to_option}" + f"copy {recursive_tag} {path_from} {path_to} {disk_from_option} {disk_to_option}" ) def move(self, path_from: str, path_to: str): self.execute_query(f"move {path_from} {path_to}") - def rm(self, path: str): - self.execute_query(f"rm {path}") + def rm(self, path: str, recursive: bool = False): + recursive_tag = "--recursive" if recursive else "" + self.execute_query(f"rm {recursive_tag} {path}") def mkdir(self, path: str, recursive: bool = False): recursive_adding = "--recursive " if recursive else "" @@ -260,7 +264,7 @@ def test_disks_app_interactive_list_directories_default(): "./.dir3/dir31": [], "./.dir3/.dir32": [], } - client.rm("dir2") + client.rm("dir2", recursive=True) traversed_dir = client.ls(".", recursive=True, show_hidden=True) assert traversed_dir == { ".": [".dir3", "dir1"], @@ -279,8 +283,8 @@ def test_disks_app_interactive_list_directories_default(): "./dir1/dir11": [], "./dir1/dir13": [], } - client.rm("dir1") - client.rm(".dir3") + client.rm("dir1", recursive=True) + client.rm(".dir3", recursive=True) assert client.ls(".", recursive=True, show_hidden=False) == {".": []} @@ -304,7 +308,7 @@ def test_disks_app_interactive_cp_and_read(): assert read_text == initial_text os.remove("a.txt") client.rm("a.txt") - client.rm("/dir1") + client.rm("/dir1", recursive=True) def test_disks_app_interactive_test_move_and_write(): From 1a952591821a9f6969a09875c5403e8ac38e7d2a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 1 Jul 2024 15:17:41 +0200 Subject: [PATCH 205/439] Add extra profiling helpers for Keeper --- src/Common/ProfileEvents.cpp | 7 ++ src/Common/ZooKeeper/ZooKeeperCommon.cpp | 36 ++++++----- src/Common/ZooKeeper/ZooKeeperCommon.h | 32 ++++----- src/Coordination/CoordinationSettings.h | 5 +- src/Coordination/KeeperConstants.cpp | 7 ++ src/Coordination/KeeperDispatcher.cpp | 17 ++++- src/Coordination/KeeperStateMachine.cpp | 82 +++++++++++++++--------- src/Coordination/KeeperStateMachine.h | 7 +- src/Coordination/KeeperStorage.cpp | 30 +++++++++ src/Coordination/SnapshotableHashTable.h | 1 + src/Server/KeeperTCPHandler.cpp | 19 ++++-- 11 files changed, 168 insertions(+), 75 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index d98373b6c55..a1058a879bd 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -611,6 +611,13 @@ The server successfully detected this situation and will download merged part fr M(KeeperPacketsReceived, "Packets received by keeper server") \ M(KeeperRequestTotal, "Total requests number on keeper server") \ M(KeeperLatency, "Keeper latency") \ + M(KeeperTotalElapsedMicroseconds, "Keeper total latency for a single request") \ + M(KeeperProcessElapsedMicroseconds, "Keeper commit latency for a single request") \ + M(KeeperPreprocessElapsedMicroseconds, "Keeper preprocessing latency for a single reuquest") \ + M(KeeperStorageLockWaitMicroseconds, "Time spent waiting for acquiring Keeper storage lock") \ + M(KeeperCommitWaitElapsedMicroseconds, "Time spent waiting for certain log to be committed") \ + M(KeeperBatchMaxCount, "Number of times the size of batch was limited by the amount") \ + M(KeeperBatchMaxTotalSize, "Number of times the size of batch was limited by the total bytes size") \ M(KeeperCommits, "Number of successful commits") \ M(KeeperCommitsFailed, "Number of failed commits") \ M(KeeperSnapshotCreations, "Number of snapshots creations")\ diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 48bb510e589..dff14f74681 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -9,7 +9,6 @@ #include #include #include -#include namespace Coordination @@ -29,7 +28,7 @@ void ZooKeeperResponse::write(WriteBuffer & out) const Coordination::write(buf.str(), out); } -std::string ZooKeeperRequest::toString() const +std::string ZooKeeperRequest::toString(bool short_format) const { return fmt::format( "XID = {}\n" @@ -37,7 +36,7 @@ std::string ZooKeeperRequest::toString() const "Additional info:\n{}", xid, getOpNum(), - toStringImpl()); + toStringImpl(short_format)); } void ZooKeeperRequest::write(WriteBuffer & out) const @@ -60,7 +59,7 @@ void ZooKeeperSyncRequest::readImpl(ReadBuffer & in) Coordination::read(path, in); } -std::string ZooKeeperSyncRequest::toStringImpl() const +std::string ZooKeeperSyncRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -91,7 +90,7 @@ void ZooKeeperReconfigRequest::readImpl(ReadBuffer & in) Coordination::read(version, in); } -std::string ZooKeeperReconfigRequest::toStringImpl() const +std::string ZooKeeperReconfigRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "joining = {}\nleaving = {}\nnew_members = {}\nversion = {}", @@ -145,7 +144,7 @@ void ZooKeeperAuthRequest::readImpl(ReadBuffer & in) Coordination::read(data, in); } -std::string ZooKeeperAuthRequest::toStringImpl() const +std::string ZooKeeperAuthRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "type = {}\n" @@ -191,7 +190,7 @@ void ZooKeeperCreateRequest::readImpl(ReadBuffer & in) is_sequential = true; } -std::string ZooKeeperCreateRequest::toStringImpl() const +std::string ZooKeeperCreateRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "path = {}\n" @@ -218,7 +217,7 @@ void ZooKeeperRemoveRequest::writeImpl(WriteBuffer & out) const Coordination::write(version, out); } -std::string ZooKeeperRemoveRequest::toStringImpl() const +std::string ZooKeeperRemoveRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "path = {}\n" @@ -245,7 +244,7 @@ void ZooKeeperExistsRequest::readImpl(ReadBuffer & in) Coordination::read(has_watch, in); } -std::string ZooKeeperExistsRequest::toStringImpl() const +std::string ZooKeeperExistsRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -272,7 +271,7 @@ void ZooKeeperGetRequest::readImpl(ReadBuffer & in) Coordination::read(has_watch, in); } -std::string ZooKeeperGetRequest::toStringImpl() const +std::string ZooKeeperGetRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -303,7 +302,7 @@ void ZooKeeperSetRequest::readImpl(ReadBuffer & in) Coordination::read(version, in); } -std::string ZooKeeperSetRequest::toStringImpl() const +std::string ZooKeeperSetRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "path = {}\n" @@ -334,7 +333,7 @@ void ZooKeeperListRequest::readImpl(ReadBuffer & in) Coordination::read(has_watch, in); } -std::string ZooKeeperListRequest::toStringImpl() const +std::string ZooKeeperListRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -356,7 +355,7 @@ void ZooKeeperFilteredListRequest::readImpl(ReadBuffer & in) list_request_type = static_cast(read_request_type); } -std::string ZooKeeperFilteredListRequest::toStringImpl() const +std::string ZooKeeperFilteredListRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "path = {}\n" @@ -401,7 +400,7 @@ void ZooKeeperSetACLRequest::readImpl(ReadBuffer & in) Coordination::read(version, in); } -std::string ZooKeeperSetACLRequest::toStringImpl() const +std::string ZooKeeperSetACLRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}\nversion = {}", path, version); } @@ -426,7 +425,7 @@ void ZooKeeperGetACLRequest::writeImpl(WriteBuffer & out) const Coordination::write(path, out); } -std::string ZooKeeperGetACLRequest::toStringImpl() const +std::string ZooKeeperGetACLRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -455,7 +454,7 @@ void ZooKeeperCheckRequest::readImpl(ReadBuffer & in) Coordination::read(version, in); } -std::string ZooKeeperCheckRequest::toStringImpl() const +std::string ZooKeeperCheckRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}\nversion = {}", path, version); } @@ -600,8 +599,11 @@ void ZooKeeperMultiRequest::readImpl(ReadBuffer & in) } } -std::string ZooKeeperMultiRequest::toStringImpl() const +std::string ZooKeeperMultiRequest::toStringImpl(bool short_format) const { + if (short_format) + return fmt::format("Subrequests size = {}", requests.size()); + auto out = fmt::memory_buffer(); for (const auto & request : requests) { diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index 490c2dce4f8..fd6ec3cd375 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -63,12 +63,12 @@ struct ZooKeeperRequest : virtual Request /// Writes length, xid, op_num, then the rest. void write(WriteBuffer & out) const; - std::string toString() const; + std::string toString(bool short_format = false) const; virtual void writeImpl(WriteBuffer &) const = 0; virtual void readImpl(ReadBuffer &) = 0; - virtual std::string toStringImpl() const { return ""; } + virtual std::string toStringImpl(bool /*short_format*/) const { return ""; } static std::shared_ptr read(ReadBuffer & in); @@ -98,7 +98,7 @@ struct ZooKeeperSyncRequest final : ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Sync; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -123,7 +123,7 @@ struct ZooKeeperReconfigRequest final : ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Reconfig; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -176,7 +176,7 @@ struct ZooKeeperAuthRequest final : ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Auth; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -229,7 +229,7 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest OpNum getOpNum() const override { return not_exists ? OpNum::CreateIfNotExists : OpNum::Create; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -266,7 +266,7 @@ struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Remove; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -293,7 +293,7 @@ struct ZooKeeperExistsRequest final : ExistsRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Exists; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -320,7 +320,7 @@ struct ZooKeeperGetRequest final : GetRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Get; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -347,7 +347,7 @@ struct ZooKeeperSetRequest final : SetRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Set; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -375,7 +375,7 @@ struct ZooKeeperListRequest : ListRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::List; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -395,7 +395,7 @@ struct ZooKeeperFilteredListRequest final : ZooKeeperListRequest OpNum getOpNum() const override { return OpNum::FilteredList; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; size_t bytesSize() const override { return ZooKeeperListRequest::bytesSize() + sizeof(list_request_type); } }; @@ -428,7 +428,7 @@ struct ZooKeeperCheckRequest : CheckRequest, ZooKeeperRequest OpNum getOpNum() const override { return not_exists ? OpNum::CheckNotExists : OpNum::Check; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -469,7 +469,7 @@ struct ZooKeeperSetACLRequest final : SetACLRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::SetACL; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -490,7 +490,7 @@ struct ZooKeeperGetACLRequest final : GetACLRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::GetACL; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -516,7 +516,7 @@ struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override; diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index a32552616ee..e7ae1f86d2e 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -58,7 +58,10 @@ struct Settings; M(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \ M(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) \ M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \ - M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) + M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) \ + M(UInt64, log_slow_total_threshold_ms, 5000, "Requests for which the total latency is larger than this settings will be logged", 0) \ + M(UInt64, log_slow_cpu_threshold_ms, 5000, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \ + M(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0) DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS) diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp index 51bf037c1c9..ff26b3171ea 100644 --- a/src/Coordination/KeeperConstants.cpp +++ b/src/Coordination/KeeperConstants.cpp @@ -238,6 +238,13 @@ M(KeeperPacketsReceived) \ M(KeeperRequestTotal) \ M(KeeperLatency) \ + M(KeeperTotalElapsedMicroseconds) \ + M(KeeperProcessElapsedMicroseconds) \ + M(KeeperPreprocessElapsedMicroseconds) \ + M(KeeperStorageLockWaitMicroseconds) \ + M(KeeperCommitWaitElapsedMicroseconds) \ + M(KeeperBatchMaxCount) \ + M(KeeperBatchMaxTotalSize) \ M(KeeperCommits) \ M(KeeperCommitsFailed) \ M(KeeperSnapshotCreations) \ diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index b4389da082d..925ac9a4efe 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -31,6 +31,13 @@ namespace CurrentMetrics extern const Metric KeeperOutstandingRequets; } +namespace ProfileEvents +{ + extern const Event KeeperCommitWaitElapsedMicroseconds; + extern const Event KeeperBatchMaxCount; + extern const Event KeeperBatchMaxTotalSize; +} + using namespace std::chrono_literals; namespace DB @@ -119,6 +126,7 @@ void KeeperDispatcher::requestThread() auto coordination_settings = configuration_and_settings->coordination_settings; uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds(); uint64_t max_batch_bytes_size = coordination_settings->max_requests_batch_bytes_size; + size_t max_batch_size = coordination_settings->max_requests_batch_size; /// The code below do a very simple thing: batch all write (quorum) requests into vector until /// previous write batch is not finished or max_batch size achieved. The main complexity goes from @@ -188,7 +196,6 @@ void KeeperDispatcher::requestThread() return false; }; - size_t max_batch_size = coordination_settings->max_requests_batch_size; while (!shutdown_called && current_batch.size() < max_batch_size && !has_reconfig_request && current_batch_bytes_size < max_batch_bytes_size && try_get_request()) ; @@ -225,6 +232,12 @@ void KeeperDispatcher::requestThread() /// Process collected write requests batch if (!current_batch.empty()) { + if (current_batch.size() == max_batch_size) + ProfileEvents::increment(ProfileEvents::KeeperBatchMaxCount, 1); + + if (current_batch_bytes_size == max_batch_bytes_size) + ProfileEvents::increment(ProfileEvents::KeeperBatchMaxTotalSize, 1); + LOG_TRACE(log, "Processing requests batch, size: {}, bytes: {}", current_batch.size(), current_batch_bytes_size); auto result = server->putRequestBatch(current_batch); @@ -243,6 +256,8 @@ void KeeperDispatcher::requestThread() /// If we will execute read or reconfig next, we have to process result now if (execute_requests_after_write) { + Stopwatch watch; + SCOPE_EXIT(ProfileEvents::increment(ProfileEvents::KeeperCommitWaitElapsedMicroseconds, watch.elapsedMicroseconds())); if (prev_result) result_buf = forceWaitAndProcessResult( prev_result, prev_batch, /*clear_requests_on_success=*/!execute_requests_after_write); diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index e4d661dfe17..a12d8a50ac3 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -1,12 +1,14 @@ #include #include +#include +#include +#include #include #include -#include #include -#include #include #include +#include #include #include #include @@ -17,7 +19,6 @@ #include #include #include -#include namespace ProfileEvents @@ -31,6 +32,7 @@ namespace ProfileEvents extern const Event KeeperSnapshotApplysFailed; extern const Event KeeperReadSnapshot; extern const Event KeeperSaveSnapshot; + extern const Event KeeperStorageLockWaitMicroseconds; } namespace DB @@ -153,6 +155,14 @@ void assertDigest( } +std::unique_lock KeeperStateMachine::getStorageLock() const +{ + Stopwatch watch; + std::unique_lock lock(storage_and_responses_lock); + ProfileEvents::increment(ProfileEvents::KeeperStorageLockWaitMicroseconds, watch.elapsedMicroseconds()); + return lock; +} + nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data) { auto result = nuraft::buffer::alloc(sizeof(log_idx)); @@ -272,7 +282,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req if (op_num == Coordination::OpNum::SessionID || op_num == Coordination::OpNum::Reconfig) return true; - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); if (storage->isFinalized()) return false; @@ -302,7 +312,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req void KeeperStateMachine::reconfigure(const KeeperStorage::RequestForSession& request_for_session) { - std::lock_guard _(storage_and_responses_lock); + auto lock = getStorageLock(); KeeperStorage::ResponseForSession response = processReconfiguration(request_for_session); if (!responses_queue.push(response)) { @@ -391,7 +401,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n if (!keeper_context->localLogsPreprocessed() && !preprocess(*request_for_session)) return nullptr; - auto try_push = [this](const KeeperStorage::ResponseForSession& response) + auto try_push = [&](const KeeperStorage::ResponseForSession& response) { if (!responses_queue.push(response)) { @@ -400,6 +410,17 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n "Failed to push response with session id {} to the queue, probably because of shutdown", response.session_id); } + + using namespace std::chrono; + uint64_t elapsed = request_for_session->time - duration_cast(system_clock::now().time_since_epoch()).count(); + if (elapsed > keeper_context->getCoordinationSettings()->log_slow_total_threshold_ms) + { + LOG_INFO( + log, + "Total time to process a request took too long ({}ms).\nRequest info: {}", + elapsed, + request_for_session->request->toString(/*short_format=*/true)); + } }; try @@ -417,7 +438,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n response_for_session.session_id = -1; response_for_session.response = response; - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); session_id = storage->getSessionID(session_id_request.session_timeout_ms); LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms); response->session_id = session_id; @@ -426,12 +447,13 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n else { if (op_num == Coordination::OpNum::Close) + { std::lock_guard lock(request_cache_mutex); parsed_request_cache.erase(request_for_session->session_id); } - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); KeeperStorage::ResponsesForSessions responses_for_sessions = storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid); for (auto & response_for_session : responses_for_sessions) @@ -482,7 +504,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) } { /// deserialize and apply snapshot to storage - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); SnapshotDeserializationResult snapshot_deserialization_result; if (latest_snapshot_ptr) @@ -534,7 +556,7 @@ void KeeperStateMachine::rollbackRequest(const KeeperStorage::RequestForSession if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) return; - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); storage->rollbackRequest(request_for_session.zxid, allow_missing); } @@ -561,7 +583,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res auto snapshot_meta_copy = nuraft::snapshot::deserialize(*snp_buf); CreateSnapshotTask snapshot_task; { /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking. - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); snapshot_task.snapshot = std::make_shared(storage.get(), snapshot_meta_copy, getClusterConfig()); } @@ -623,7 +645,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res } { /// Destroy snapshot with lock - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); LOG_TRACE(log, "Clearing garbage after snapshot"); /// Turn off "snapshot mode" and clear outdate part of storage state storage->clearGarbageAfterSnapshot(); @@ -764,7 +786,7 @@ int KeeperStateMachine::read_logical_snp_obj( void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSession & request_for_session) { /// Pure local request, just process it with storage - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); auto responses = storage->processRequest( request_for_session.request, request_for_session.session_id, std::nullopt, true /*check_acl*/, true /*is_local*/); for (const auto & response : responses) @@ -774,97 +796,97 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi void KeeperStateMachine::shutdownStorage() { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); storage->finalize(); } std::vector KeeperStateMachine::getDeadSessions() { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getDeadSessions(); } int64_t KeeperStateMachine::getNextZxid() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getNextZXID(); } KeeperStorage::Digest KeeperStateMachine::getNodesDigest() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getNodesDigest(false); } uint64_t KeeperStateMachine::getLastProcessedZxid() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getZXID(); } uint64_t KeeperStateMachine::getNodesCount() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getNodesCount(); } uint64_t KeeperStateMachine::getTotalWatchesCount() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getTotalWatchesCount(); } uint64_t KeeperStateMachine::getWatchedPathsCount() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getWatchedPathsCount(); } uint64_t KeeperStateMachine::getSessionsWithWatchesCount() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getSessionsWithWatchesCount(); } uint64_t KeeperStateMachine::getTotalEphemeralNodesCount() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getTotalEphemeralNodesCount(); } uint64_t KeeperStateMachine::getSessionWithEphemeralNodesCount() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getSessionWithEphemeralNodesCount(); } void KeeperStateMachine::dumpWatches(WriteBufferFromOwnString & buf) const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); storage->dumpWatches(buf); } void KeeperStateMachine::dumpWatchesByPath(WriteBufferFromOwnString & buf) const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); storage->dumpWatchesByPath(buf); } void KeeperStateMachine::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); storage->dumpSessionsAndEphemerals(buf); } uint64_t KeeperStateMachine::getApproximateDataSize() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getApproximateDataSize(); } uint64_t KeeperStateMachine::getKeyArenaSize() const { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); return storage->getArenaDataSize(); } @@ -905,7 +927,7 @@ ClusterConfigPtr KeeperStateMachine::getClusterConfig() const void KeeperStateMachine::recalculateStorageStats() { - std::lock_guard lock(storage_and_responses_lock); + auto lock = getStorageLock(); LOG_INFO(log, "Recalculating storage stats"); storage->recalculateStats(); LOG_INFO(log, "Done recalculating storage stats"); diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index ee6109f0a17..5b166e11569 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -131,6 +131,8 @@ public: void reconfigure(const KeeperStorage::RequestForSession& request_for_session); private: + std::unique_lock getStorageLock() const; + CommitCallback commit_callback; /// In our state machine we always have a single snapshot which is stored /// in memory in compressed (serialized) format. @@ -139,7 +141,7 @@ private: nuraft::ptr latest_snapshot_buf = nullptr; /// Main state machine logic - KeeperStoragePtr storage TSA_PT_GUARDED_BY(storage_and_responses_lock); + KeeperStoragePtr storage; /// Save/Load and Serialize/Deserialize logic for snapshots. KeeperSnapshotManager snapshot_manager; @@ -183,7 +185,6 @@ private: KeeperSnapshotManagerS3 * snapshot_manager_s3; KeeperStorage::ResponseForSession processReconfiguration( - const KeeperStorage::RequestForSession& request_for_session) - TSA_REQUIRES(storage_and_responses_lock); + const KeeperStorage::RequestForSession& request_for_session); }; } diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index d6225baaf4c..1e53a664d1b 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -40,6 +40,8 @@ namespace ProfileEvents extern const Event KeeperGetRequest; extern const Event KeeperListRequest; extern const Event KeeperExistsRequest; + extern const Event KeeperPreprocessElapsedMicroseconds; + extern const Event KeeperProcessElapsedMicroseconds; } namespace DB @@ -2309,6 +2311,20 @@ void KeeperStorage::preprocessRequest( std::optional digest, int64_t log_idx) { + Stopwatch watch; + SCOPE_EXIT({ + auto elapsed = watch.elapsedMicroseconds(); + if (auto elapsed_ms = elapsed / 1000; elapsed_ms > keeper_context->getCoordinationSettings()->log_slow_cpu_threshold_ms) + { + LOG_INFO( + getLogger("KeeperStorage"), + "Preprocessing a request took too long ({}ms).\nRequest info: {}", + elapsed_ms, + zk_request->toString(/*short_format=*/true)); + } + ProfileEvents::increment(ProfileEvents::KeeperPreprocessElapsedMicroseconds, watch.elapsedMicroseconds()); + }); + if (!initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage system nodes are not initialized"); @@ -2409,6 +2425,20 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( bool check_acl, bool is_local) { + Stopwatch watch; + SCOPE_EXIT({ + auto elapsed = watch.elapsedMicroseconds(); + if (auto elapsed_ms = elapsed / 1000; elapsed_ms > keeper_context->getCoordinationSettings()->log_slow_cpu_threshold_ms) + { + LOG_INFO( + getLogger("KeeperStorage"), + "Processing a request took too long ({}ms).\nRequest info: {}", + elapsed_ms, + zk_request->toString(/*short_format=*/true)); + } + ProfileEvents::increment(ProfileEvents::KeeperProcessElapsedMicroseconds, watch.elapsedMicroseconds()); + }); + if (!initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage system nodes are not initialized"); diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h index 70858930115..5f2b14e17b0 100644 --- a/src/Coordination/SnapshotableHashTable.h +++ b/src/Coordination/SnapshotableHashTable.h @@ -3,6 +3,7 @@ #include #include +#include namespace DB { diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 4612e2e9fa8..47064b467e7 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -13,11 +13,9 @@ #include #include #include -#include #include #include #include -#include #include #include #include @@ -30,6 +28,11 @@ #include #endif +namespace ProfileEvents +{ + extern const Event KeeperTotalElapsedMicroseconds; +} + namespace DB { @@ -411,12 +414,12 @@ void KeeperTCPHandler::runImpl() keeper_dispatcher->registerSession(session_id, response_callback); Stopwatch logging_stopwatch; + auto operation_max_ms = keeper_dispatcher->getKeeperContext()->getCoordinationSettings()->log_slow_connection_operation_threshold_ms; auto log_long_operation = [&](const String & operation) { - constexpr UInt64 operation_max_ms = 500; auto elapsed_ms = logging_stopwatch.elapsedMilliseconds(); if (operation_max_ms < elapsed_ms) - LOG_TEST(log, "{} for session {} took {} ms", operation, session_id, elapsed_ms); + LOG_INFO(log, "{} for session {} took {} ms", operation, session_id, elapsed_ms); logging_stopwatch.restart(); }; @@ -611,11 +614,13 @@ void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response /// update statistics ignoring watch response and heartbeat. if (response->xid != Coordination::WATCH_XID && response->getOpNum() != Coordination::OpNum::Heartbeat) { - Int64 elapsed = (Poco::Timestamp() - operations[response->xid]) / 1000; - conn_stats.updateLatency(elapsed); + Int64 elapsed = (Poco::Timestamp() - operations[response->xid]); + ProfileEvents::increment(ProfileEvents::KeeperTotalElapsedMicroseconds, elapsed); + Int64 elapsed_ms = elapsed / 1000; + conn_stats.updateLatency(elapsed_ms); operations.erase(response->xid); - keeper_dispatcher->updateKeeperStatLatency(elapsed); + keeper_dispatcher->updateKeeperStatLatency(elapsed_ms); last_op.set(std::make_unique(LastOp{ .name = Coordination::toString(response->getOpNum()), From 9e92aed2de6594524070bb6dac8e11d07616c954 Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 1 Jul 2024 13:24:13 +0000 Subject: [PATCH 206/439] Correct style check --- programs/disks/CommandCopy.cpp | 6 ++++++ programs/disks/CommandMove.cpp | 6 ++++++ programs/disks/CommandRemove.cpp | 6 ++++++ 3 files changed, 18 insertions(+) diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index eef87535e51..62eb1cad6ab 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -7,6 +7,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + + class CommandCopy final : public ICommand { public: diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 22c1f851174..40b698c9340 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -4,6 +4,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + + class CommandMove final : public ICommand { public: diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index caa70905bef..3b6ad018fb8 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -5,6 +5,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + + class CommandRemove final : public ICommand { public: From 7361407ea50e6cc22ced76527634b3404a99642c Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 1 Jul 2024 15:25:00 +0200 Subject: [PATCH 207/439] Fix test --- src/Coordination/CoordinationSettings.cpp | 17 +++++++++++++++++ .../test_keeper_four_word_command/test.py | 10 ++++++++++ 2 files changed, 27 insertions(+) diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp index 05f691ca76b..d72d39fd7e1 100644 --- a/src/Coordination/CoordinationSettings.cpp +++ b/src/Coordination/CoordinationSettings.cpp @@ -169,6 +169,23 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const writeText("async_replication=", buf); write_bool(coordination_settings->async_replication); + + writeText("latest_logs_cache_size_threshold=", buf); + write_int(coordination_settings->latest_logs_cache_size_threshold); + writeText("commit_logs_cache_size_threshold=", buf); + write_int(coordination_settings->commit_logs_cache_size_threshold); + + writeText("disk_move_retries_wait_ms=", buf); + write_int(coordination_settings->disk_move_retries_wait_ms); + writeText("disk_move_retries_during_init=", buf); + write_int(coordination_settings->disk_move_retries_during_init); + + writeText("log_slow_total_threshold_ms=", buf); + write_int(coordination_settings->log_slow_total_threshold_ms); + writeText("log_slow_cpu_threshold_ms=", buf); + write_int(coordination_settings->log_slow_cpu_threshold_ms); + writeText("log_slow_connection_operation_threshold_ms=", buf); + write_int(coordination_settings->log_slow_connection_operation_threshold_ms); } KeeperConfigurationAndSettingsPtr diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index 44b2b50673a..a3a059c1dcb 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -293,6 +293,16 @@ def test_cmd_conf(started_cluster): assert result["configuration_change_tries_count"] == "20" assert result["async_replication"] == "true" + + assert result["latest_logs_cache_size_threshold"] == "1073741824" + assert result["commit_logs_cache_size_threshold"] == "524288000" + + assert result["disk_move_retries_wait_ms"] == "1000" + assert result["disk_move_retries_during_init"] == "100" + + assert result["log_slow_total_threshold_ms"] == "5000" + assert result["log_slow_cpu_threshold_ms"] == "5000" + assert result["log_slow_connection_operation_threshold_ms"] == "1000" finally: close_keeper_socket(client) From 5e7a8245c467dbc40344bda67760ad1d33535994 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 1 Jul 2024 15:40:49 +0200 Subject: [PATCH 208/439] Fix memory leak --- .../ObjectStorage/DataLakes/DeltaLakeMetadata.cpp | 12 +++++++----- .../ObjectStorage/DataLakes/DeltaLakeMetadata.h | 3 --- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 3b6cbca5d46..bc64ef15cf1 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -51,8 +51,10 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -struct DeltaLakeMetadata::Impl +struct DeltaLakeMetadataImpl { + using ConfigurationPtr = DeltaLakeMetadata::ConfigurationPtr; + ObjectStoragePtr object_storage; ConfigurationPtr configuration; ContextPtr context; @@ -61,7 +63,7 @@ struct DeltaLakeMetadata::Impl * Useful links: * - https://github.com/delta-io/delta/blob/master/PROTOCOL.md#data-files */ - Impl(ObjectStoragePtr object_storage_, + DeltaLakeMetadataImpl(ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, ContextPtr context_) : object_storage(object_storage_) @@ -586,14 +588,14 @@ DeltaLakeMetadata::DeltaLakeMetadata( ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, ContextPtr context_) - : impl(std::make_unique(object_storage_, configuration_, context_)) { - auto result = impl->processMetadataFiles(); + auto impl = DeltaLakeMetadataImpl(object_storage_, configuration_, context_); + auto result = impl.processMetadataFiles(); data_files = result.data_files; schema = result.schema; partition_columns = result.partition_columns; - LOG_TRACE(impl->log, "Found {} data files, {} partition files, schema: {}", + LOG_TRACE(impl.log, "Found {} data files, {} partition files, schema: {}", data_files.size(), partition_columns.size(), schema.toString()); } diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h index 926bd1b451d..a479a3dd293 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h @@ -45,10 +45,7 @@ public: } private: - struct Impl; - const std::shared_ptr impl; mutable Strings data_files; - NamesAndTypesList schema; std::unordered_map column_name_to_physical_name; DataLakePartitionColumns partition_columns; From b0bbc9c8104ae36750f8216a1e197331f5c7d8ab Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 1 Jul 2024 16:00:32 +0200 Subject: [PATCH 209/439] Fix symlinks --- programs/keeper/clickhouse-keeper.cpp | 30 --------------------------- programs/keeper/keeper_main.cpp | 8 ++++++- 2 files changed, 7 insertions(+), 31 deletions(-) delete mode 100644 programs/keeper/clickhouse-keeper.cpp diff --git a/programs/keeper/clickhouse-keeper.cpp b/programs/keeper/clickhouse-keeper.cpp deleted file mode 100644 index f2f91930ac0..00000000000 --- a/programs/keeper/clickhouse-keeper.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include -#include "config_tools.h" - - -int mainEntryClickHouseKeeper(int argc, char ** argv); - -#if ENABLE_CLICKHOUSE_KEEPER_CLIENT -int mainEntryClickHouseKeeperClient(int argc, char ** argv); -#endif - -int main(int argc_, char ** argv_) -{ -#if ENABLE_CLICKHOUSE_KEEPER_CLIENT - - if (argc_ >= 2) - { - /// 'clickhouse-keeper --client ...' and 'clickhouse-keeper client ...' are OK - if (strcmp(argv_[1], "--client") == 0 || strcmp(argv_[1], "client") == 0) - { - argv_[1] = argv_[0]; - return mainEntryClickHouseKeeperClient(--argc_, argv_ + 1); - } - } - - if (argc_ > 0 && (strcmp(argv_[0], "clickhouse-keeper-client") == 0 || endsWith(argv_[0], "/clickhouse-keeper-client"))) - return mainEntryClickHouseKeeperClient(argc_, argv_); -#endif - - return mainEntryClickHouseKeeper(argc_, argv_); -} diff --git a/programs/keeper/keeper_main.cpp b/programs/keeper/keeper_main.cpp index a5bc5db7be8..ec9b84ce94b 100644 --- a/programs/keeper/keeper_main.cpp +++ b/programs/keeper/keeper_main.cpp @@ -339,7 +339,13 @@ bool isClickhouseApp(std::string_view app_suffix, std::vector & argv) } } - return false; + /// keeper suffix is default which will be used if no other app is detected + if (app_suffix == "keeper") + return false; + + /// Use app if clickhouse binary is run through symbolic link with name clickhouse-app + std::string app_name = "clickhouse-" + std::string(app_suffix); + return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name)); } /// Don't allow dlopen in the main ClickHouse binary, because it is harmful and insecure. From 12608d2090485e5cd98f82b78c64f7014e7e391c Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Mon, 1 Jul 2024 14:22:32 +0000 Subject: [PATCH 210/439] Improve exception text --- src/Functions/bitShiftLeft.cpp | 6 +++--- src/Functions/bitShiftRight.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index 3d496296ba9..645672c50e2 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -26,7 +26,7 @@ struct BitShiftLeftImpl if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); else if (b < 0 || static_cast(b) > 8 * sizeof(A)) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) << static_cast(b); else @@ -43,7 +43,7 @@ struct BitShiftLeftImpl UInt8 word_size = 8; size_t n = end - pos; if (b < 0 || b > B(word_size * n)) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); /// To prevent overflow if (static_cast(b) >= (static_cast(n) * word_size)) @@ -112,7 +112,7 @@ struct BitShiftLeftImpl UInt8 word_size = 8; size_t n = end - pos; if (b < 0 || b > B(word_size * n)) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); /// To prevent overflow if (static_cast(b) >= (static_cast(n) * word_size)) { diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 0b41493fc6d..2e9182d3fe6 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -27,7 +27,7 @@ struct BitShiftRightImpl if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); else if (b < 0 || static_cast(b) > 8 * sizeof(A)) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) >> static_cast(b); else @@ -59,7 +59,7 @@ struct BitShiftRightImpl UInt8 word_size = 8; size_t n = end - pos; if (b < 0 || b > B(word_size * n)) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); /// To prevent overflow if (static_cast(b) >= (static_cast(n) * word_size)) { @@ -99,7 +99,7 @@ struct BitShiftRightImpl UInt8 word_size = 8; size_t n = end - pos; if (b < 0 || b > B(word_size * n)) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); /// To prevent overflow if (static_cast(b) >= (static_cast(n) * word_size)) { From f596f0f66aa571afe3d762d938a812f52a8a9766 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Sun, 30 Jun 2024 23:59:08 +0200 Subject: [PATCH 211/439] add restriction for storage join --- src/Storages/StorageJoin.cpp | 5 ++- ...join_strictness_type_restriction.reference | 0 ...orage_join_strictness_type_restriction.sql | 42 +++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.reference create mode 100644 tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index d12e5b1a20b..eb58b9ec3f8 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -395,11 +395,14 @@ void registerStorageJoin(StorageFactory & factory) else if (kind_str == "full") { if (strictness == JoinStrictness::Any) - strictness = JoinStrictness::RightAny; + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ANY FULL JOINs are not implemented"); kind = JoinKind::Full; } } + if ((strictness == JoinStrictness::Semi || strictness == JoinStrictness::Anti) && (kind != JoinKind::Left && kind != JoinKind::Right)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, " SEMI|ANTI JOIN should be LEFT or RIGHT"); + if (kind == JoinKind::Comma) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter of storage Join must be LEFT or INNER or RIGHT or FULL (without quotes)."); diff --git a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.reference b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql new file mode 100644 index 00000000000..1c52f79db11 --- /dev/null +++ b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, ALL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, INNER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, OUTER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, ALL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, INNER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, OUTER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANY, OUTER, a); -- { serverError BAD_ARGUMENTS } From cc37cbdd176867ab444f22d58a2feb5297ef952c Mon Sep 17 00:00:00 2001 From: Han Fei Date: Mon, 1 Jul 2024 17:03:27 +0200 Subject: [PATCH 212/439] refine tests --- .../03197_storage_join_strictness_type_restriction.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql index 1c52f79db11..5aa3e4c2e0c 100644 --- a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql +++ b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql @@ -15,7 +15,7 @@ CREATE TABLE t1 ( a Int64, b Int64 -) Engine = Join(SEMI, OUTER, a); -- { serverError BAD_ARGUMENTS } +) Engine = Join(SEMI, FULL, a); -- { serverError BAD_ARGUMENTS } CREATE TABLE t1 ( @@ -33,10 +33,10 @@ CREATE TABLE t1 ( a Int64, b Int64 -) Engine = Join(ANTI, OUTER, a); -- { serverError BAD_ARGUMENTS } +) Engine = Join(ANTI, FULL, a); -- { serverError BAD_ARGUMENTS } CREATE TABLE t1 ( a Int64, b Int64 -) Engine = Join(ANY, OUTER, a); -- { serverError BAD_ARGUMENTS } +) Engine = Join(ANY, FULL, a); -- { serverError NOT_IMPLEMENTED } From a1630073258c1790608aa726e428745b08d464e2 Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 1 Jul 2024 15:33:34 +0000 Subject: [PATCH 213/439] Change creation criteria of a local disk --- programs/disks/DisksApp.cpp | 2 +- src/Disks/DiskSelector.cpp | 8 +++++--- src/Disks/DiskSelector.h | 8 +++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 392fca8e035..0898b692095 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -484,7 +484,7 @@ int DisksApp::main(const std::vector & /*args*/) auto validator = [](const Poco::Util::AbstractConfiguration &, const std::string &, const std::string &) { return true; }; constexpr auto config_prefix = "storage_configuration.disks"; - auto disk_selector = std::make_shared(std::unordered_set{"cache", "encrypted"}, /*create_local=*/true); + auto disk_selector = std::make_shared(std::unordered_set{"cache", "encrypted"}); disk_selector->initialize(config(), config_prefix, global_context, validator); std::vector>> disks_with_path; diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index f3b4893e820..f45d12618bf 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -66,9 +66,11 @@ void DiskSelector::initialize( default_disk_name, std::make_shared(default_disk_name, context->getPath(), 0, context, config, config_prefix)); } - if (!has_local_disk && create_local) + if (!has_local_disk && (context->getApplicationType() == Context::ApplicationType::DISKS)) + { + throw_away_local_on_update = true; disks.emplace(local_disk_name, std::make_shared(local_disk_name, "/", 0, context, config, config_prefix)); - + } is_initialized = true; } @@ -115,7 +117,7 @@ DiskSelectorPtr DiskSelector::updateFromConfig( } old_disks_minus_new_disks.erase(default_disk_name); - if (create_local) + if (throw_away_local_on_update) { old_disks_minus_new_disks.erase(local_disk_name); } diff --git a/src/Disks/DiskSelector.h b/src/Disks/DiskSelector.h index 8ceb4a58c15..49a1be5cf50 100644 --- a/src/Disks/DiskSelector.h +++ b/src/Disks/DiskSelector.h @@ -20,10 +20,7 @@ class DiskSelector public: static constexpr auto TMP_INTERNAL_DISK_PREFIX = "__tmp_internal_"; - explicit DiskSelector(std::unordered_set skip_types_ = {}, bool create_local_ = false) - : skip_types(skip_types_), create_local(create_local_) - { - } + explicit DiskSelector(std::unordered_set skip_types_ = {}) : skip_types(skip_types_) { } DiskSelector(const DiskSelector & from) = default; using DiskValidator = std::function; @@ -53,7 +50,8 @@ private: void assertInitialized() const; const std::unordered_set skip_types; - const bool create_local; + + bool throw_away_local_on_update = false; }; } From adcaf117a1b2987aa47c08d06e5db0c177a191b8 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 1 Jul 2024 16:38:39 +0100 Subject: [PATCH 214/439] impl --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 6 +++--- src/Storages/MergeTree/MergeTreeSource.cpp | 9 ++++----- src/Storages/MergeTree/MergeTreeSource.h | 3 ++- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 0dacdc0b958..b35a2e6f220 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -382,7 +382,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); pipes.emplace_back(std::move(source)); } @@ -481,7 +481,7 @@ Pipe ReadFromMergeTree::readFromPool( pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); if (i == 0) source->addTotalRowsApprox(total_rows); @@ -593,7 +593,7 @@ Pipe ReadFromMergeTree::readInOrder( processor->addPartLevelToChunk(isQueryWithFinal()); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); if (set_rows_approx) source->addTotalRowsApprox(total_rows); diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp index fcf2dd76e3f..e323b9f9ee7 100644 --- a/src/Storages/MergeTree/MergeTreeSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSource.cpp @@ -133,9 +133,8 @@ private: }; #endif -MergeTreeSource::MergeTreeSource(MergeTreeSelectProcessorPtr processor_) - : ISource(processor_->getHeader()) - , processor(std::move(processor_)) +MergeTreeSource::MergeTreeSource(MergeTreeSelectProcessorPtr processor_, const std::string & log_name_) + : ISource(processor_->getHeader()), processor(std::move(processor_)), log_name(log_name_) { #if defined(OS_LINUX) if (processor->getSettings().use_asynchronous_read_from_pool) @@ -207,7 +206,7 @@ std::optional MergeTreeSource::tryGenerate() try { - OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"}; + OpenTelemetry::SpanHolder span{fmt::format("MergeTreeSource({})::tryGenerate", log_name)}; holder->setResult(processor->read()); } catch (...) @@ -222,7 +221,7 @@ std::optional MergeTreeSource::tryGenerate() } #endif - OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"}; + OpenTelemetry::SpanHolder span{fmt::format("MergeTreeSource({})::tryGenerate", log_name)}; return processReadResult(processor->read()); } diff --git a/src/Storages/MergeTree/MergeTreeSource.h b/src/Storages/MergeTree/MergeTreeSource.h index 655f0ee6ebe..fc39b4f9b09 100644 --- a/src/Storages/MergeTree/MergeTreeSource.h +++ b/src/Storages/MergeTree/MergeTreeSource.h @@ -12,7 +12,7 @@ struct ChunkAndProgress; class MergeTreeSource final : public ISource { public: - explicit MergeTreeSource(MergeTreeSelectProcessorPtr processor_); + explicit MergeTreeSource(MergeTreeSelectProcessorPtr processor_, const std::string & log_name_); ~MergeTreeSource() override; std::string getName() const override; @@ -30,6 +30,7 @@ protected: private: MergeTreeSelectProcessorPtr processor; + const std::string log_name; #if defined(OS_LINUX) struct AsyncReadingState; From ad3170accae51d8b9ad93da6d4e889e162a576ac Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 1 Jul 2024 16:06:17 +0000 Subject: [PATCH 215/439] Correct tests --- .../0_stateless/02802_clickhouse_disks_s3_copy.sh | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh index 1638a3ff9c3..20b02bcba32 100755 --- a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh +++ b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh @@ -15,13 +15,10 @@ function run_test_for_disk() echo "$disk" clickhouse-disks -C "$config" --disk "$disk" --query "write --path-from $config $CLICKHOUSE_DATABASE/test" - clickhouse-disks -C "$config" --log-level test --disk "$disk" --query "copy $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { + clickhouse-disks -C "$config" --log-level test --disk "$disk" --query "copy -r $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { grep -o -e "Single part upload has completed." -e "Single operation copy has completed." } - clickhouse-disks -C "$config" --disk "$disk" --query "remove $CLICKHOUSE_DATABASE/test" - # NOTE: this is due to "copy" does works like "cp -R from to/" instead of "cp from to" - clickhouse-disks -C "$config" --disk "$disk" --query "remove $CLICKHOUSE_DATABASE/test.copy/test" - clickhouse-disks -C "$config" --disk "$disk" --query "remove $CLICKHOUSE_DATABASE/test.copy" + clickhouse-disks -C "$config" --disk "$disk" --query "remove -r $CLICKHOUSE_DATABASE/test" } function run_test_copy_from_s3_to_s3(){ @@ -31,11 +28,10 @@ function run_test_copy_from_s3_to_s3(){ echo "copy from $disk_src to $disk_dest" clickhouse-disks -C "$config" --disk "$disk_src" --query "write --path-from $config $CLICKHOUSE_DATABASE/test" - clickhouse-disks -C "$config" --log-level test --query "copy --disk-from $disk_src --disk-to $disk_dest $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { + clickhouse-disks -C "$config" --log-level test --query "copy -r --disk-from $disk_src --disk-to $disk_dest $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { grep -o -e "Single part upload has completed." -e "Single operation copy has completed." } - clickhouse-disks -C "$config" --disk "$disk_dest" --query "remove $CLICKHOUSE_DATABASE/test.copy/test" - clickhouse-disks -C "$config" --disk "$disk_dest" --query "remove $CLICKHOUSE_DATABASE/test.copy" + clickhouse-disks -C "$config" --disk "$disk_dest" --query "remove -r $CLICKHOUSE_DATABASE/test.copy" } run_test_for_disk s3_plain_native_copy From ef24f517892891b3b298dc33cbb27493fa4ba668 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 1 Jul 2024 19:17:06 +0200 Subject: [PATCH 216/439] Fix handling of SSL_ERROR_WANT_READ/SSL_ERROR_WANT_WRITE with zero timeout Previously if you were using socket without timeout it wasn't able to handle SSL_ERROR_WANT_READ/SSL_ERROR_WANT_WRITE, and even though sockets without timeouts is an odd thing (but it is possible - [1]), it still may be possible somewhere. [1]: https://github.com/ClickHouse/ClickHouse/pull/65917 Signed-off-by: Azat Khuzhin --- .../include/Poco/Net/SecureSocketImpl.h | 5 +-- .../NetSSL_OpenSSL/src/SecureSocketImpl.cpp | 31 +++++++++++++------ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h index 49c12b6b45f..890752c52da 100644 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h +++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h @@ -235,8 +235,6 @@ namespace Net /// Note that simply closing a socket is not sufficient /// to be able to re-use it again. - Poco::Timespan getMaxTimeout(); - private: SecureSocketImpl(const SecureSocketImpl &); SecureSocketImpl & operator=(const SecureSocketImpl &); @@ -250,6 +248,9 @@ namespace Net Session::Ptr _pSession; friend class SecureStreamSocketImpl; + + Poco::Timespan getMaxTimeoutOrLimit(); + //// Return max(send, receive) if non zero, otherwise maximum timeout }; diff --git a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp index efe25f65909..4873d259ae5 100644 --- a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp +++ b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp @@ -199,7 +199,7 @@ void SecureSocketImpl::connectSSL(bool performHandshake) if (performHandshake && _pSocket->getBlocking()) { int ret; - Poco::Timespan remaining_time = getMaxTimeout(); + Poco::Timespan remaining_time = getMaxTimeoutOrLimit(); do { RemainingTimeCounter counter(remaining_time); @@ -302,7 +302,7 @@ int SecureSocketImpl::sendBytes(const void* buffer, int length, int flags) return rc; } - Poco::Timespan remaining_time = getMaxTimeout(); + Poco::Timespan remaining_time = getMaxTimeoutOrLimit(); do { RemainingTimeCounter counter(remaining_time); @@ -338,7 +338,7 @@ int SecureSocketImpl::receiveBytes(void* buffer, int length, int flags) return rc; } - Poco::Timespan remaining_time = getMaxTimeout(); + Poco::Timespan remaining_time = getMaxTimeoutOrLimit(); do { /// SSL record may consist of several TCP packets, @@ -372,7 +372,7 @@ int SecureSocketImpl::completeHandshake() poco_check_ptr (_pSSL); int rc; - Poco::Timespan remaining_time = getMaxTimeout(); + Poco::Timespan remaining_time = getMaxTimeoutOrLimit(); do { RemainingTimeCounter counter(remaining_time); @@ -453,18 +453,29 @@ X509* SecureSocketImpl::peerCertificate() const return 0; } -Poco::Timespan SecureSocketImpl::getMaxTimeout() +Poco::Timespan SecureSocketImpl::getMaxTimeoutOrLimit() { std::lock_guard lock(_mutex); Poco::Timespan remaining_time = _pSocket->getReceiveTimeout(); Poco::Timespan send_timeout = _pSocket->getSendTimeout(); if (remaining_time < send_timeout) remaining_time = send_timeout; + /// zero SO_SNDTIMEO/SO_RCVTIMEO works as no timeout, let's replicate this + /// + /// NOTE: we cannot use INT64_MAX (std::numeric_limits::max()), + /// since it will be later passed to poll() which accept int timeout, and + /// even though poll() accepts milliseconds and Timespan() accepts + /// microseconds, let's use smaller maximum value just to avoid some possible + /// issues, this should be enough anyway (it is ~24 days). + if (remaining_time == 0) + remaining_time = Poco::Timespan(std::numeric_limits::max()); return remaining_time; } bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time) { + if (remaining_time == 0) + return false; std::lock_guard lock(_mutex); if (rc <= 0) { @@ -475,9 +486,7 @@ bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time) case SSL_ERROR_WANT_READ: if (_pSocket->getBlocking()) { - /// Level-triggered mode of epoll_wait is used, so if SSL_read don't read all available data from socket, - /// epoll_wait returns true without waiting for new data even if remaining_time == 0 - if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_READ) && remaining_time != 0) + if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_READ)) return true; else throw Poco::TimeoutException(); @@ -486,13 +495,15 @@ bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time) case SSL_ERROR_WANT_WRITE: if (_pSocket->getBlocking()) { - /// The same as for SSL_ERROR_WANT_READ - if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_WRITE) && remaining_time != 0) + if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_WRITE)) return true; else throw Poco::TimeoutException(); } break; + /// NOTE: POCO_EINTR is the same as SSL_ERROR_WANT_READ (at least in + /// OpenSSL), so this likely dead code, but let's leave it for + /// compatibility with other implementations case SSL_ERROR_SYSCALL: return socketError == POCO_EAGAIN || socketError == POCO_EINTR; default: From 242b553253b9437c44533986e3655f4ff770670f Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 1 Jul 2024 17:35:07 +0000 Subject: [PATCH 217/439] Change unique_ptr creation to shared_ptr --- programs/disks/CommandChangeDirectory.cpp | 2 +- programs/disks/CommandCopy.cpp | 2 +- programs/disks/CommandLink.cpp | 2 +- programs/disks/CommandMkDir.cpp | 2 +- programs/disks/CommandMove.cpp | 2 +- programs/disks/CommandRead.cpp | 2 +- programs/disks/CommandRemove.cpp | 2 +- programs/disks/CommandSwitchDisk.cpp | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/programs/disks/CommandChangeDirectory.cpp b/programs/disks/CommandChangeDirectory.cpp index 5c4ce737375..11596f88e4b 100644 --- a/programs/disks/CommandChangeDirectory.cpp +++ b/programs/disks/CommandChangeDirectory.cpp @@ -29,7 +29,7 @@ public: CommandPtr makeCommandChangeDirectory() { - return std::make_unique(); + return std::make_shared(); } } diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index 62eb1cad6ab..c7b2f7cdcac 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -94,6 +94,6 @@ public: CommandPtr makeCommandCopy() { - return std::make_unique(); + return std::make_shared(); } } diff --git a/programs/disks/CommandLink.cpp b/programs/disks/CommandLink.cpp index 74707160f67..51aff2ef0af 100644 --- a/programs/disks/CommandLink.cpp +++ b/programs/disks/CommandLink.cpp @@ -31,7 +31,7 @@ public: CommandPtr makeCommandLink() { - return std::make_unique(); + return std::make_shared(); } } diff --git a/programs/disks/CommandMkDir.cpp b/programs/disks/CommandMkDir.cpp index 535936480d9..c6222f326d4 100644 --- a/programs/disks/CommandMkDir.cpp +++ b/programs/disks/CommandMkDir.cpp @@ -34,7 +34,7 @@ public: CommandPtr makeCommandMkDir() { - return std::make_unique(); + return std::make_shared(); } } diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 40b698c9340..e3d485032e0 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -72,7 +72,7 @@ public: CommandPtr makeCommandMove() { - return std::make_unique(); + return std::make_shared(); } } diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp index 9f60cca2873..11207578150 100644 --- a/programs/disks/CommandRead.cpp +++ b/programs/disks/CommandRead.cpp @@ -46,7 +46,7 @@ public: CommandPtr makeCommandRead() { - return std::make_unique(); + return std::make_shared(); } } diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index 3b6ad018fb8..ad859dd0310 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -52,7 +52,7 @@ public: CommandPtr makeCommandRemove() { - return std::make_unique(); + return std::make_shared(); } } diff --git a/programs/disks/CommandSwitchDisk.cpp b/programs/disks/CommandSwitchDisk.cpp index 9e5d443ebeb..fa02d991365 100644 --- a/programs/disks/CommandSwitchDisk.cpp +++ b/programs/disks/CommandSwitchDisk.cpp @@ -30,6 +30,6 @@ public: CommandPtr makeCommandSwitchDisk() { - return std::make_unique(); + return std::make_shared(); } } From 4d85603e13ba16bdf0eac1b8d45bcf78a88514c7 Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 1 Jul 2024 17:56:26 +0000 Subject: [PATCH 218/439] Fix build problems --- src/Coordination/Standalone/Context.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h index d3bbfececed..971689a9215 100644 --- a/src/Coordination/Standalone/Context.h +++ b/src/Coordination/Standalone/Context.h @@ -132,6 +132,7 @@ public: { KEEPER, SERVER, + DISKS }; void setApplicationType(ApplicationType) {} From 447c0db2bc1db2706598a5fe0eb81332b3e4f0ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 1 Jul 2024 20:11:19 +0200 Subject: [PATCH 219/439] Fix SettingsChangesHistory 24.7 --- src/Core/SettingsChangesHistory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 194a0024f2b..65efc157741 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -58,6 +58,7 @@ String ClickHouseVersion::toString() const static std::initializer_list> settings_changes_history_initializer = { {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."}, }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, @@ -88,7 +89,6 @@ static std::initializer_list Date: Mon, 1 Jul 2024 21:55:45 +0200 Subject: [PATCH 220/439] Fix --- src/Interpreters/Cache/WriteBufferToFileSegment.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index e654d091561..dd038948adf 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include @@ -134,7 +134,7 @@ std::unique_ptr WriteBufferToFileSegment::getReadBufferImpl() if (file_segment->getDownloadedSize() > 0) return std::make_unique(file_segment->getPath()); else - return std::make_unique(); + return std::make_unique(); } } From 27e0e57054010446a530efc3eb02e85d09f7e9e2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 1 Jul 2024 22:47:36 +0200 Subject: [PATCH 221/439] Use ReadBufferFromFileBase instead of ReadBufferFromFile for reread_buffer_from_file --- src/Storages/MergeTree/MergeTask.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 7ab8fa2430a..c8f1a08128b 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -555,18 +555,18 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const if (!reread_buf) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot read temporary file {}", ctx->rows_sources_uncompressed_write_buf->getFileName()); - auto * reread_buffer_raw = dynamic_cast(reread_buf.get()); + auto * reread_buffer_raw = dynamic_cast(reread_buf.get()); if (!reread_buffer_raw) { const auto & reread_buf_ref = *reread_buf; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ReadBufferFromFile, but got {}", demangle(typeid(reread_buf_ref).name())); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ReadBufferFromFileBase, but got {}", demangle(typeid(reread_buf_ref).name())); } /// Move ownership from std::unique_ptr to std::unique_ptr for CompressedReadBufferFromFile. /// First, release ownership from unique_ptr to base type. reread_buf.release(); /// NOLINT(bugprone-unused-return-value,hicpp-ignored-remove-result): we already have the pointer value in `reread_buffer_raw` /// Then, move ownership to unique_ptr to concrete type. - std::unique_ptr reread_buffer_from_file(reread_buffer_raw); + std::unique_ptr reread_buffer_from_file(reread_buffer_raw); /// CompressedReadBufferFromFile expects std::unique_ptr as argument. ctx->rows_sources_read_buf = std::make_unique(std::move(reread_buffer_from_file)); From 2991e27183a7dfe0e60d944155759f15123d96a3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 22 Jun 2024 15:26:46 +0200 Subject: [PATCH 222/439] Parse user from URL for dashboard.html (useful for sharing) Signed-off-by: Azat Khuzhin --- programs/server/dashboard.html | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index b21d4b86314..45f988f7b1e 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -506,6 +506,14 @@ let user = 'default'; let password = ''; let add_http_cors_header = (location.protocol != 'file:'); +const current_url = new URL(window.location); +/// Substitute user name if it's specified in the query string +const user_from_url = current_url.searchParams.get('user'); +if (user_from_url) { + user = user_from_url; +} + + const errorCodeMessageMap = { 516: 'Error authenticating with database. Please check your connection params and try again.' } From 93c1b5d8a72559517bafd45cf94e2e531e39a404 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Tue, 2 Jul 2024 08:21:51 +0000 Subject: [PATCH 223/439] Address issues pointed out in the PR --- docs/en/sql-reference/functions/bit-functions.md | 6 +++--- src/Functions/FunctionBitTestMany.h | 2 +- src/Functions/bitTest.cpp | 8 ++++---- .../0_stateless/01082_bit_test_out_of_bound.reference | 3 +++ tests/queries/0_stateless/01082_bit_test_out_of_bound.sql | 4 ++++ 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index a48893b93bf..5ab7e07fcad 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -173,7 +173,7 @@ See function [substring](string-functions.md#substring). ## bitTest -Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. The countdown starts from 0 from the right to the left. +Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. Counting is right-to-left, starting at 0. **Syntax** @@ -226,7 +226,7 @@ Result: ## bitTestAll -Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. The countdown starts from 0 from the right to the left. +Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. Counting is right-to-left, starting at 0. The conjuction for bit-wise operations: @@ -289,7 +289,7 @@ Result: ## bitTestAny -Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. The countdown starts from 0 from the right to the left. +Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. Counting is right-to-left, starting at 0. The disjunction for bit-wise operations: diff --git a/src/Functions/FunctionBitTestMany.h b/src/Functions/FunctionBitTestMany.h index 19ece2ae9e5..950e4ab4ea8 100644 --- a/src/Functions/FunctionBitTestMany.h +++ b/src/Functions/FunctionBitTestMany.h @@ -16,8 +16,8 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int PARAMETER_OUT_OF_BOUND; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; } diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp index 1223ef7cbbb..cb6b83c1cf1 100644 --- a/src/Functions/bitTest.cpp +++ b/src/Functions/bitTest.cpp @@ -28,10 +28,10 @@ struct BitTestImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "bitTest is not implemented for big integers as second argument"); else { - typename NumberTraits::ToInteger::Type a_int(a); - typename NumberTraits::ToInteger::Type b_int(b); - const auto max_position = decltype(b)((8 * sizeof(a)) - 1); - if (b > max_position || b < 0) + typename NumberTraits::ToInteger::Type a_int = a; + typename NumberTraits::ToInteger::Type b_int = b; + const auto max_position = static_cast((8 * sizeof(a)) - 1); + if (b_int > max_position || b_int < 0) throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "The bit position argument needs to a positive value and less or equal to {} for integer {}", std::to_string(max_position), std::to_string(a_int)); diff --git a/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference b/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference index cf12c6b0b1c..26085389381 100644 --- a/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference +++ b/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference @@ -1,3 +1,4 @@ +-- bitTestAny 0 1 1 0 2 1 @@ -6,6 +7,7 @@ 5 0 6 1 7 0 +-- bitTestAll 0 1 1 0 2 1 @@ -14,6 +16,7 @@ 5 0 6 1 7 0 +-- bitTest 0 1 1 0 2 1 diff --git a/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql b/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql index 92ece2a4aa4..e741cb249d0 100644 --- a/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql +++ b/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql @@ -1,8 +1,12 @@ +SELECT '-- bitTestAny'; SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } + +SELECT '-- bitTestAll'; SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } +SELECT '-- bitTest'; SELECT number, bitTest(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); SELECT number, bitTest(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } SELECT number, bitTest(toUInt16(1 + 4 + 16 + 64 + 256 + 1024 + 4096 + 16384 + 65536), number) FROM numbers(16); From aaffa64cdd34da31009c206a30307e7b5db91155 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 2 Jul 2024 10:30:45 +0200 Subject: [PATCH 224/439] Fix data race for Keeper snapshot queue --- src/Common/ConcurrentBoundedQueue.h | 8 +------- src/Coordination/KeeperDispatcher.cpp | 10 ++-------- src/Coordination/KeeperStateMachine.cpp | 8 +++++--- 3 files changed, 8 insertions(+), 18 deletions(-) diff --git a/src/Common/ConcurrentBoundedQueue.h b/src/Common/ConcurrentBoundedQueue.h index 922607da813..16b9488c98d 100644 --- a/src/Common/ConcurrentBoundedQueue.h +++ b/src/Common/ConcurrentBoundedQueue.h @@ -1,8 +1,6 @@ #pragma once #include -#include -#include #include #include #include @@ -200,22 +198,18 @@ public: */ bool finish() { - bool was_finished_before = false; - { std::lock_guard lock(queue_mutex); if (is_finished) return true; - was_finished_before = is_finished; is_finished = true; } pop_condition.notify_all(); push_condition.notify_all(); - - return was_finished_before; + return false; } /// Returns if queue is finished diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index b4389da082d..38893242a2b 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -319,19 +319,13 @@ void KeeperDispatcher::snapshotThread() { setThreadName("KeeperSnpT"); const auto & shutdown_called = keeper_context->isShutdownCalled(); - while (!shutdown_called) + CreateSnapshotTask task; + while (snapshots_queue.pop(task)) { - CreateSnapshotTask task; - if (!snapshots_queue.pop(task)) - break; - try { auto snapshot_file_info = task.create_snapshot(std::move(task.snapshot), /*execute_only_cleanup=*/shutdown_called); - if (shutdown_called) - break; - if (!snapshot_file_info) continue; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index e4d661dfe17..df152bbe0af 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -569,7 +569,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res snapshot_task.create_snapshot = [this, when_done](KeeperStorageSnapshotPtr && snapshot, bool execute_only_cleanup) { nuraft::ptr exception(nullptr); - bool ret = true; + bool ret = false; if (!execute_only_cleanup) { try @@ -599,7 +599,8 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res else { auto snapshot_buf = snapshot_manager.serializeSnapshotToBuffer(*snapshot); - auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(*snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); + auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk( + *snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); latest_snapshot_info = std::move(snapshot_info); latest_snapshot_buf = std::move(snapshot_buf); } @@ -612,13 +613,14 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res latest_snapshot_info->path); } } + + ret = true; } catch (...) { ProfileEvents::increment(ProfileEvents::KeeperSnapshotCreationsFailed); LOG_TRACE(log, "Exception happened during snapshot"); tryLogCurrentException(log); - ret = false; } } { From 9e586f0871f67d612cb8c10ec631c4d3ca810237 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 2 Jul 2024 10:50:21 +0200 Subject: [PATCH 225/439] Fixes --- src/Coordination/CoordinationSettings.h | 2 +- src/Coordination/KeeperStateMachine.cpp | 74 +++++++++++++------------ src/Coordination/KeeperStateMachine.h | 8 +-- src/Coordination/KeeperStorage.cpp | 4 +- 4 files changed, 46 insertions(+), 42 deletions(-) diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index e7ae1f86d2e..6e23a56ef97 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -60,7 +60,7 @@ struct Settings; M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \ M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) \ M(UInt64, log_slow_total_threshold_ms, 5000, "Requests for which the total latency is larger than this settings will be logged", 0) \ - M(UInt64, log_slow_cpu_threshold_ms, 5000, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \ + M(UInt64, log_slow_cpu_threshold_ms, 100, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \ M(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0) DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS) diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index a12d8a50ac3..88f708ab4ae 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -153,14 +153,20 @@ void assertDigest( } } -} - -std::unique_lock KeeperStateMachine::getStorageLock() const +struct TSA_SCOPED_LOCKABLE LockGuardWithStats final { - Stopwatch watch; - std::unique_lock lock(storage_and_responses_lock); - ProfileEvents::increment(ProfileEvents::KeeperStorageLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; + std::unique_lock lock; + explicit LockGuardWithStats(std::mutex & mutex) TSA_ACQUIRE(mutex) + { + Stopwatch watch; + std::unique_lock l(mutex); + ProfileEvents::increment(ProfileEvents::KeeperStorageLockWaitMicroseconds, watch.elapsedMicroseconds()); + lock = std::move(l); + } + + ~LockGuardWithStats() TSA_RELEASE() = default; +}; + } nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data) @@ -282,7 +288,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req if (op_num == Coordination::OpNum::SessionID || op_num == Coordination::OpNum::Reconfig) return true; - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); if (storage->isFinalized()) return false; @@ -312,7 +318,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req void KeeperStateMachine::reconfigure(const KeeperStorage::RequestForSession& request_for_session) { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); KeeperStorage::ResponseForSession response = processReconfiguration(request_for_session); if (!responses_queue.push(response)) { @@ -412,7 +418,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n } using namespace std::chrono; - uint64_t elapsed = request_for_session->time - duration_cast(system_clock::now().time_since_epoch()).count(); + uint64_t elapsed = duration_cast(system_clock::now().time_since_epoch()).count() - request_for_session->time; if (elapsed > keeper_context->getCoordinationSettings()->log_slow_total_threshold_ms) { LOG_INFO( @@ -438,7 +444,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n response_for_session.session_id = -1; response_for_session.response = response; - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); session_id = storage->getSessionID(session_id_request.session_timeout_ms); LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms); response->session_id = session_id; @@ -453,7 +459,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n parsed_request_cache.erase(request_for_session->session_id); } - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); KeeperStorage::ResponsesForSessions responses_for_sessions = storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid); for (auto & response_for_session : responses_for_sessions) @@ -504,7 +510,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) } { /// deserialize and apply snapshot to storage - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); SnapshotDeserializationResult snapshot_deserialization_result; if (latest_snapshot_ptr) @@ -556,7 +562,7 @@ void KeeperStateMachine::rollbackRequest(const KeeperStorage::RequestForSession if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) return; - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); storage->rollbackRequest(request_for_session.zxid, allow_missing); } @@ -583,7 +589,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res auto snapshot_meta_copy = nuraft::snapshot::deserialize(*snp_buf); CreateSnapshotTask snapshot_task; { /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking. - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); snapshot_task.snapshot = std::make_shared(storage.get(), snapshot_meta_copy, getClusterConfig()); } @@ -645,7 +651,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res } { /// Destroy snapshot with lock - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); LOG_TRACE(log, "Clearing garbage after snapshot"); /// Turn off "snapshot mode" and clear outdate part of storage state storage->clearGarbageAfterSnapshot(); @@ -786,7 +792,7 @@ int KeeperStateMachine::read_logical_snp_obj( void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSession & request_for_session) { /// Pure local request, just process it with storage - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); auto responses = storage->processRequest( request_for_session.request, request_for_session.session_id, std::nullopt, true /*check_acl*/, true /*is_local*/); for (const auto & response : responses) @@ -796,97 +802,97 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi void KeeperStateMachine::shutdownStorage() { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); storage->finalize(); } std::vector KeeperStateMachine::getDeadSessions() { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getDeadSessions(); } int64_t KeeperStateMachine::getNextZxid() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getNextZXID(); } KeeperStorage::Digest KeeperStateMachine::getNodesDigest() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getNodesDigest(false); } uint64_t KeeperStateMachine::getLastProcessedZxid() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getZXID(); } uint64_t KeeperStateMachine::getNodesCount() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getNodesCount(); } uint64_t KeeperStateMachine::getTotalWatchesCount() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getTotalWatchesCount(); } uint64_t KeeperStateMachine::getWatchedPathsCount() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getWatchedPathsCount(); } uint64_t KeeperStateMachine::getSessionsWithWatchesCount() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getSessionsWithWatchesCount(); } uint64_t KeeperStateMachine::getTotalEphemeralNodesCount() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getTotalEphemeralNodesCount(); } uint64_t KeeperStateMachine::getSessionWithEphemeralNodesCount() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getSessionWithEphemeralNodesCount(); } void KeeperStateMachine::dumpWatches(WriteBufferFromOwnString & buf) const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); storage->dumpWatches(buf); } void KeeperStateMachine::dumpWatchesByPath(WriteBufferFromOwnString & buf) const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); storage->dumpWatchesByPath(buf); } void KeeperStateMachine::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); storage->dumpSessionsAndEphemerals(buf); } uint64_t KeeperStateMachine::getApproximateDataSize() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getApproximateDataSize(); } uint64_t KeeperStateMachine::getKeyArenaSize() const { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getArenaDataSize(); } @@ -927,7 +933,7 @@ ClusterConfigPtr KeeperStateMachine::getClusterConfig() const void KeeperStateMachine::recalculateStorageStats() { - auto lock = getStorageLock(); + LockGuardWithStats lock(storage_and_responses_lock); LOG_INFO(log, "Recalculating storage stats"); storage->recalculateStats(); LOG_INFO(log, "Done recalculating storage stats"); diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 5b166e11569..7ea14aa2d30 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -131,8 +131,6 @@ public: void reconfigure(const KeeperStorage::RequestForSession& request_for_session); private: - std::unique_lock getStorageLock() const; - CommitCallback commit_callback; /// In our state machine we always have a single snapshot which is stored /// in memory in compressed (serialized) format. @@ -141,7 +139,7 @@ private: nuraft::ptr latest_snapshot_buf = nullptr; /// Main state machine logic - KeeperStoragePtr storage; + KeeperStoragePtr storage TSA_PT_GUARDED_BY(storage_and_responses_lock); /// Save/Load and Serialize/Deserialize logic for snapshots. KeeperSnapshotManager snapshot_manager; @@ -184,7 +182,7 @@ private: KeeperSnapshotManagerS3 * snapshot_manager_s3; - KeeperStorage::ResponseForSession processReconfiguration( - const KeeperStorage::RequestForSession& request_for_session); + KeeperStorage::ResponseForSession processReconfiguration(const KeeperStorage::RequestForSession & request_for_session) + TSA_REQUIRES(storage_and_responses_lock); }; } diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 1e53a664d1b..1542eb0d71a 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -2322,7 +2322,7 @@ void KeeperStorage::preprocessRequest( elapsed_ms, zk_request->toString(/*short_format=*/true)); } - ProfileEvents::increment(ProfileEvents::KeeperPreprocessElapsedMicroseconds, watch.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::KeeperPreprocessElapsedMicroseconds, elapsed); }); if (!initialized) @@ -2436,7 +2436,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( elapsed_ms, zk_request->toString(/*short_format=*/true)); } - ProfileEvents::increment(ProfileEvents::KeeperProcessElapsedMicroseconds, watch.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::KeeperProcessElapsedMicroseconds, elapsed); }); if (!initialized) From a905b24f7585191f2271f77cc4817e11f5b758f0 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 2 Jul 2024 11:54:56 +0200 Subject: [PATCH 226/439] Fix clang-tidy --- src/Processors/Chunk.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 8e3ca0b03b3..5f6cf2f7230 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -125,7 +125,7 @@ void Chunk::addColumn(size_t position, ColumnPtr column) if (position >= columns.size()) throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Chunk::addColumn(), max position = {}", - position, columns.size() ? columns.size() - 1 : 0); + position, !columns.empty() ? columns.size() - 1 : 0); if (empty()) num_rows = column->size(); else if (column->size() != num_rows) @@ -143,7 +143,7 @@ void Chunk::erase(size_t position) if (position >= columns.size()) throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Chunk::erase(), max position = {}", - toString(position), toString(columns.size() ? columns.size() - 1 : 0)); + toString(position), toString(!columns.empty() ? columns.size() - 1 : 0)); columns.erase(columns.begin() + position); } From ee0c4093d461233cfb920a4a224292ea9529393b Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 2 Jul 2024 12:03:27 +0200 Subject: [PATCH 227/439] Update run.sh --- docker/test/stateless/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 3ce489b9e0e..7d6499cef5e 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -253,7 +253,7 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e - clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + timeout -s TERM --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt From 422b8dea317168ddc4aa4c0e14e0d0350ce0be81 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 2 Jul 2024 12:30:22 +0200 Subject: [PATCH 228/439] Add database_replicated_allow_heavy_create to settings changes --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index b0725340f46..70f94fe2ab0 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -59,6 +59,7 @@ static std::initializer_list Date: Tue, 2 Jul 2024 12:32:38 +0200 Subject: [PATCH 229/439] Add tests for base64URLEncode and base64URLDecode --- .../03167_base64_url_functions_sh.reference | 0 .../03167_base64_url_functions_sh.sh | 191 ++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100755 tests/queries/0_stateless/03167_base64_url_functions_sh.reference create mode 100755 tests/queries/0_stateless/03167_base64_url_functions_sh.sh diff --git a/tests/queries/0_stateless/03167_base64_url_functions_sh.reference b/tests/queries/0_stateless/03167_base64_url_functions_sh.reference new file mode 100755 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03167_base64_url_functions_sh.sh b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh new file mode 100755 index 00000000000..d4f56d8e18c --- /dev/null +++ b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh @@ -0,0 +1,191 @@ +#!/usr/bin/env bash +# Tags: no-fastest, no-debug +# shellcheck disable=SC2155 + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +urls=( + "http://www.example.com" + "https://secure.example.com" + "http://example.com" + "https://www.example.org" + "https://subdomain.example.com" + "http://sub.sub.example.com" + "http://192.168.1.1" + "https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]" + "http://example.com:8080" + "https://example.com:443" + "http://example.com/path/to/page.html" + "https://example.com/path/with/trailing/slash/" + "http://example.com/search?q=query&lang=en" + "https://example.com/path?param1=value1¶m2=value2" + "http://example.com/page.html#section1" + "https://example.com/document.pdf#page=10" + "http://user:password@example.com" + "https://user@example.com" + "https://user:pass@sub.example.com:8080/path/page.html?query=123#fragment" + "http://example.com/path%20with%20spaces" + "https://example.com/search?q=encode+this" + "http://例子.测试" + "https://mañana.com" + "http://example.com/%E2%82%AC" + "data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==" + "file:///C:/path/to/file.txt" + "file:///home/user/document.pdf" + "ftp://ftp.example.com/pub/file.zip" + "ftps://secure-ftp.example.com/private/doc.pdf" + "mailto:user@example.com" + "mailto:user@example.com?subject=Hello&body=How%20are%20you" + "git://github.com/user/repo.git" + "ssh://user@host.xz:port/path/to/repo.git" + "https://example.com/path(1)/[2]/{3}" + "http://example.com/path;param?query,value" + "" + "http://" + "example.com" + "http:" + "//" + "?query=value" + "#fragment" + "http://?#" + "http://xn--bcher-kva.ch" + "https://xn--bcher-kva.xn--tckwe/xn--8ws00zhy3a/%E6%B8%AC%E8%A9%A6.php?xn--o39an51a5phao35a=xn--mgbh0fb&xn--fiq228c5hs=test" + "https://xn--3e0b707e.xn--79-8kcre8v3a/%ED%85%8C%EC%8A%A4%ED%8A%B8/%ED%8C%8C%EC%9D%BC.jsp?xn--i1b6b1a6a2e=xn--9t4b11yi5a&xn--3e0b707e=xn--80aaa1cbgbm" + "https://example.com/path?param=value&special=!@#$%^&*()" + + "http://example.com/path/with/~tilde" + "https://example.com/path/with/\`backtick\`" + + "https://example.com/path?param1=value1¶m2=value2¶m3=value3#section1#section2" + "http://example.com/page?q1=v1&q2=v2#frag1#frag2#frag3" + + "https://example.com/☃/snowman" + "http://example.com/path/⽇本語" + "https://example.com/ü/ñ/path?q=ç" + + "https://example.com/path/to/very/long/url/that/exceeds/two/hundred/and/fifty/five/characters/lorem/ipsum/dolor/sit/amet/consectetur/adipiscing/elit/sed/do/eiusmod/tempor/incididunt/ut/labore/et/dolore/magna/aliqua/ut/enim/ad/minim/veniam/quis/nostrud/exercitation/ullamco/laboris/nisi/ut/aliquip/ex/ea/commodo/consequat" + + "https://example.com//path///to//file" + "http://example.com/path?param1=value1&¶m2=value2&&¶m3=value3" + + "http://example.com/%70%61%74%68?%70%61%72%61%6d=%76%61%6c%75%65#%66%72%61%67%6d%65%6e%74" + + "HtTpS://ExAmPlE.cOm/PaTh" + "http://EXAMPLE.COM/PATH" + + "http://127.0.0.1:8080/path" + "https://[::1]/path" + "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080/path" + + "http://example.com:65535/path" + "https://example.com:0/path" + + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAACklEQVR4nGMAAQAABQABDQottAAAAABJRU5ErkJggg==" + + "https://user:password@example.com:8080/path?query=value#fragment" + "ftp://anonymous:password@ftp.example.com/pub/" + + "http://example.com/path%20with%20spaces" + "https://example.com/search?q=query%20with%20spaces" + + "https://www.mañana.com/path" + "http://例子.测试/path" + "https://рм.рф/path" + + "https://user:pass@sub.example.com:8080/p/a/t/h?query=123&key=value#fragid1" + + "jdbc:mysql://localhost:3306/database" + "market://details?id=com.example.app" + "tel:+1-816-555-1212" + "sms:+18165551212" + + "http://[1080:0:0:0:8:800:200C:417A]/index.html" + "https://[2001:db8::1428:57ab]:8080/path" + + "http://.." + "http://../" + "http://??" + "http://??/" + "http:///a" + "http://example.com??" + "http://example.com??/" + "foo://example.com:8042/over/there?name=ferret#nose" + "//example.com/path" +) + + +base64URLEncode() { + echo -n "$1" | openssl base64 -e -A | tr '+/' '-_' | tr -d '=' +} + +base64URLDecode() { + local len=$((${#1} % 4)) + local result="$1" + if [ $len -eq 2 ]; then result="$1"'==' + elif [ $len -eq 3 ]; then result="$1"'=' + fi + echo "$result" | tr '_-' '/+' | openssl base64 -d -A +} + +test_compare_to_gold_encode() { + local input="$1" + local encode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLEncode('$input')") + local encode_gold=$(base64URLEncode $input) + + if [ "$encode" != "$encode_gold" ]; then + echo "Input: $input" + echo "Expected: $encode_gold" + echo "Got: $encode" + fi +} + +test_compare_to_gold_decode() { + local input="$1" + local encode_gold=$(base64URLEncode $input) + local decode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode('$encode_gold')") + local decode_gold=$(base64URLDecode $encode_gold) + + if [ "$decode" != "$decode_gold" ]; then + echo "Input: $input" + echo "Expected: $decode_gold" + echo "Got: $decode" + fi +} + +test_compare_to_self() { + local input="$1" + local encode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLEncode('$input')") + local decode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode('$encode')") + + if [ "$decode" != "$input" ]; then + echo "Input: $input" + echo "Encode: $encode" + echo "Got: $decode" + fi +} + +for url in "${urls[@]}"; do + test_compare_to_gold_encode "$url" +done + +for url in "${urls[@]}"; do + test_compare_to_gold_decode "$url" +done + +for url in "${urls[@]}"; do + test_compare_to_self "$url" +done + +# special case for ' +encode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLEncode('http://example.com/!$&\'()*+,;=:@/path')") +decode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode('$encode')") +if [ "$decode" != "http://example.com/!$&\'()*+,;=:@/path" ]; then + echo "Special case fail" + echo "Encode: $encode" + echo "Got: $decode" +fi From baa4d50067bb9fdbfaee4d41df9ba531fc1bbfd7 Mon Sep 17 00:00:00 2001 From: Mikhail Gorshkov Date: Tue, 2 Jul 2024 10:51:58 +0000 Subject: [PATCH 230/439] Fix support of non-const scale arguments in power function --- src/Functions/FunctionsRound.h | 171 ++++++++++++++---- .../03165_round_scale_as_column.reference | 13 ++ .../03165_round_scale_as_column.sql | 3 +- 3 files changed, 152 insertions(+), 35 deletions(-) diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index 08e257de8ac..d43f7f264b4 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -518,39 +518,105 @@ struct Dispatcher template static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr) { - const auto & value_col_typed = checkAndGetColumn>(*value_col); - auto col_res = ColumnVector::create(); - - typename ColumnVector::Container & vec_res = col_res->getData(); - vec_res.resize(value_col_typed.getData().size()); - - if (!vec_res.empty()) + // Non-const value argument: + const auto * value_col_typed = checkAndGetColumn>(value_col); + if (value_col_typed) { + auto col_res = ColumnVector::create(); + + typename ColumnVector::Container & vec_res = col_res->getData(); + vec_res.resize(value_col_typed->getData().size()); + + if (!vec_res.empty()) + { + // Const scale argument: + if (scale_col == nullptr || isColumnConst(*scale_col)) + { + auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); + if (scale_arg == 0) + { + size_t scale = 1; + FunctionRoundingImpl::apply(value_col_typed->getData(), scale, vec_res); + } + else if (scale_arg > 0) + { + size_t scale = intExp10(scale_arg); + FunctionRoundingImpl::apply(value_col_typed->getData(), scale, vec_res); + } + else + { + size_t scale = intExp10(-scale_arg); + FunctionRoundingImpl::apply(value_col_typed->getData(), scale, vec_res); + } + } + /// Non-const scale argument: + else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) + { + const auto & value_data = value_col_typed->getData(); + const auto & scale_data = scale_col_typed->getData(); + const size_t rows = value_data.size(); + + for (size_t i = 0; i < rows; ++i) + { + Int64 scale64 = scale_data[i]; + validateScale(scale64); + Scale raw_scale = scale64; + + if (raw_scale == 0) + { + size_t scale = 1; + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } + else if (raw_scale > 0) + { + size_t scale = intExp10(raw_scale); + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } + else + { + size_t scale = intExp10(-raw_scale); + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } + } + } + } + return col_res; + } + // Const value argument: + const auto * value_col_typed_const = checkAndGetColumnConst>(value_col); + if (value_col_typed_const) + { + const auto & value_data = value_col_typed_const->template getValue(); + // Const scale argument: + auto col_res = ColumnVector::create(); + typename ColumnVector::Container & vec_res = col_res->getData(); if (scale_col == nullptr || isColumnConst(*scale_col)) { + vec_res.resize(1); auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); if (scale_arg == 0) { size_t scale = 1; - FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); } else if (scale_arg > 0) { size_t scale = intExp10(scale_arg); - FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); } else { size_t scale = intExp10(-scale_arg); - FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); } } /// Non-const scale argument: else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) { - const auto & value_data = value_col_typed.getData(); const auto & scale_data = scale_col_typed->getData(); - const size_t rows = value_data.size(); + const size_t rows = scale_data.size(); + + vec_res.resize(rows); for (size_t i = 0; i < rows; ++i) { @@ -561,23 +627,23 @@ struct Dispatcher if (raw_scale == 0) { size_t scale = 1; - FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); } else if (raw_scale > 0) { size_t scale = intExp10(raw_scale); - FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); } else { size_t scale = intExp10(-raw_scale); - FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); } } } + return col_res; } - - return col_res; + return nullptr; } }; @@ -589,24 +655,64 @@ public: template static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr) { - const auto & value_col_typed = checkAndGetColumn>(*value_col); - const typename ColumnDecimal::Container & vec_src = value_col_typed.getData(); - - auto col_res = ColumnDecimal::create(vec_src.size(), value_col_typed.getScale()); - auto & vec_res = col_res->getData(); - - if (!vec_res.empty()) + // Non-const value argument: + const auto * value_col_typed = checkAndGetColumn>(value_col); + if (value_col_typed) { + const typename ColumnDecimal::Container & vec_src = value_col_typed->getData(); + + auto col_res = ColumnDecimal::create(vec_src.size(), value_col_typed->getScale()); + auto & vec_res = col_res->getData(); + vec_res.resize(vec_src.size()); + + if (!vec_res.empty()) + { + /// Const scale argument: + if (scale_col == nullptr || isColumnConst(*scale_col)) + { + auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); + DecimalRoundingImpl::apply(vec_src, value_col_typed->getScale(), vec_res, scale_arg); + } + /// Non-const scale argument: + else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) + { + const auto & scale = scale_col_typed->getData(); + const size_t rows = vec_src.size(); + + for (size_t i = 0; i < rows; ++i) + { + Int64 scale64 = scale[i]; + validateScale(scale64); + Scale raw_scale = scale64; + + DecimalRoundingImpl::applyOne(value_col_typed->getElement(i), value_col_typed->getScale(), + reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); + } + } + } + + return col_res; + } + // Const value argument: + const auto * value_col_typed_const = checkAndGetColumnConst>(value_col); + if (value_col_typed_const) + { + auto col = assert_cast*>(value_col_typed_const->getDataColumnPtr().get()); + const auto & value_data = value_col_typed_const->template getValue(); + // Const scale argument: if (scale_col == nullptr || isColumnConst(*scale_col)) { + auto col_res = ColumnDecimal::create(1, col->getScale()); auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); - DecimalRoundingImpl::apply(value_col_typed.getData(), value_col_typed.getScale(), vec_res, scale_arg); + DecimalRoundingImpl::applyOne(value_data, col->getScale(), reinterpret_cast::NativeT&>(col_res->getElement(0)), scale_arg); + return col_res; } - /// Non-const scale argument - else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) + /// Non-const scale argument: + if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) { const auto & scale = scale_col_typed->getData(); - const size_t rows = vec_src.size(); + const size_t rows = scale.size(); + auto col_res = ColumnDecimal::create(rows, col->getScale()); for (size_t i = 0; i < rows; ++i) { @@ -614,13 +720,13 @@ public: validateScale(scale64); Scale raw_scale = scale64; - DecimalRoundingImpl::applyOne(value_col_typed.getElement(i), value_col_typed.getScale(), + DecimalRoundingImpl::applyOne(value_data, col->getScale(), reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); } + return col_res; } } - - return col_res; + return nullptr; } }; @@ -671,9 +777,6 @@ public: using ScaleTypes = std::decay_t; using ScaleType = typename ScaleTypes::RightType; - if (isColumnConst(*value_arg.column) && !isColumnConst(*scale_column.column)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale column must be const for const data column"); - res = Dispatcher::template apply(value_arg.column.get(), scale_column.column.get()); return true; }; diff --git a/tests/queries/0_stateless/03165_round_scale_as_column.reference b/tests/queries/0_stateless/03165_round_scale_as_column.reference index 9ad25ed466a..e0c9b6959ee 100644 --- a/tests/queries/0_stateless/03165_round_scale_as_column.reference +++ b/tests/queries/0_stateless/03165_round_scale_as_column.reference @@ -2162,4 +2162,17 @@ CHECKPOINT2 10 1.6275 1.6275 1.6275 1.6275 1 1 +3 +3.1 +3.14 +3.142 +3.1416 +3.14159 +3.141593 +3.1415927 +3.14159265 +3.141592654 +42 +42.4 +42.42 1 diff --git a/tests/queries/0_stateless/03165_round_scale_as_column.sql b/tests/queries/0_stateless/03165_round_scale_as_column.sql index 229f705808d..adae36564b8 100644 --- a/tests/queries/0_stateless/03165_round_scale_as_column.sql +++ b/tests/queries/0_stateless/03165_round_scale_as_column.sql @@ -118,6 +118,7 @@ DROP TABLE tab; SELECT round(1, 1); SELECT round(materialize(1), materialize(1)); -SELECT round(1, materialize(1)); --{serverError ILLEGAL_COLUMN} +SELECT round(pi(), number) FROM numbers(10); +SELECT round(toDecimal32(42.42, 2), number) from numbers(3); SELECT round(materialize(1), 1); SELECT materialize(10.1) AS x, ceil(x, toUInt256(123)); --{serverError ILLEGAL_TYPE_OF_ARGUMENT} From 2ce564daa0a7eb98b46dbed50d2eddfff8a731c2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 2 Jul 2024 13:05:17 +0200 Subject: [PATCH 231/439] Make 01006_simpod_empty_part_single_column_write.sh always use vertical merge --- .../0_stateless/01006_simpod_empty_part_single_column_write.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh b/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh index 16ebf2e6e54..c3ad29d33a1 100755 --- a/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh +++ b/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh @@ -18,7 +18,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE table_with_empty_part ENGINE = MergeTree() ORDER BY id PARTITION BY id -SETTINGS vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0, remove_empty_parts = 0 +SETTINGS vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0, remove_empty_parts = 0, min_bytes_for_wide_part=0, min_bytes_for_full_part_storage = 0 " From f2c06becd5fb64d1075a7a327d990f44eeb12b2d Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 2 Jul 2024 13:17:28 +0200 Subject: [PATCH 232/439] Fix race in s3queue --- src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 955e49bc2bf..1939ea0a66f 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -111,10 +111,12 @@ void ObjectStorageQueueSource::FileIterator::returnForRetry(Source::ObjectInfoPt if (metadata->useBucketsForProcessing()) { const auto bucket = metadata->getBucketForPath(object_info->relative_path); + std::lock_guard lock(mutex); listed_keys_cache[bucket].keys.emplace_front(object_info); } else { + std::lock_guard lock(mutex); objects_to_retry.push_back(object_info); } } From 94dba17ad92d6982e5e681113e97eb39099ff696 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 2 Jul 2024 13:26:52 +0200 Subject: [PATCH 233/439] Fix test --- tests/integration/test_keeper_four_word_command/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index a3a059c1dcb..83503122729 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -301,7 +301,7 @@ def test_cmd_conf(started_cluster): assert result["disk_move_retries_during_init"] == "100" assert result["log_slow_total_threshold_ms"] == "5000" - assert result["log_slow_cpu_threshold_ms"] == "5000" + assert result["log_slow_cpu_threshold_ms"] == "100" assert result["log_slow_connection_operation_threshold_ms"] == "1000" finally: close_keeper_socket(client) From c4da270ddb67e0710e35e0ed022b91280d30dc70 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Tue, 2 Jul 2024 14:01:01 +0200 Subject: [PATCH 234/439] remove no-fastest, no-debug --- tests/queries/0_stateless/03167_base64_url_functions_sh.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/03167_base64_url_functions_sh.sh b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh index d4f56d8e18c..12eea7462df 100755 --- a/tests/queries/0_stateless/03167_base64_url_functions_sh.sh +++ b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-fastest, no-debug # shellcheck disable=SC2155 set -e From a2626037bc6ed631758c364edcc096c983805b0c Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Jul 2024 14:15:59 +0200 Subject: [PATCH 235/439] Improve object storage tags in tests --- docker/test/stateful/run.sh | 4 ++++ docker/test/stateless/run.sh | 2 +- docker/test/stress/run.sh | 1 - tests/ci/stress_check.py | 3 +++ tests/clickhouse-test | 18 +++++++++++++++--- ...6_replace_partition_from_table_zookeeper.sh | 2 +- .../00632_get_sample_block_cache.sql | 2 +- ...0731_long_merge_tree_select_opened_files.sh | 2 +- ...3_system_columns_and_system_tables_long.sql | 2 +- .../0_stateless/00763_lock_buffer_long.sh | 2 +- .../01070_mutations_with_dependencies.sql | 2 +- .../01078_merge_tree_read_one_thread.sql | 2 +- .../01200_mutations_memory_consumption.sql | 2 +- .../0_stateless/01221_system_settings.sql | 2 +- .../0_stateless/01275_parallel_mv.sql.j2 | 4 ++-- .../01281_group_by_limit_memory_tracking.sh | 2 +- .../0_stateless/01293_optimize_final_force.sh | 2 +- .../0_stateless/01304_direct_io_long.sh | 2 +- .../01343_min_bytes_to_use_mmap_io.sql | 2 +- .../01344_min_bytes_to_use_mmap_io_index.sql | 2 +- .../0_stateless/01475_read_subcolumns.sql | 2 +- .../01475_read_subcolumns_storages.sh | 2 +- ...ce_condition_rename_clear_zookeeper_long.sh | 2 +- ...2_execute_merges_on_single_replica_long.sql | 2 +- .../0_stateless/01533_multiple_nested.sql | 2 +- .../01551_mergetree_read_in_order_spread.sql | 2 +- ...1605_adaptive_granularity_block_borders.sql | 4 ++-- .../01643_merge_tree_fsync_smoke.sql | 2 +- ...01643_replicated_merge_tree_fsync_smoke.sql | 2 +- ...5_normalize_create_alter_function_names.sql | 2 +- .../01810_max_part_removal_threads_long.sh | 2 +- .../02226_filesystem_cache_profile_events.sh | 2 +- .../02228_merge_tree_insert_memory_usage.sql | 4 ++-- ...33_optimize_aggregation_in_order_prefix.sql | 2 +- ...filesystem_cache_bypass_cache_threshold.sql | 2 +- .../02240_filesystem_query_cache.sql | 2 +- .../02240_system_filesystem_cache_table.sh | 2 +- ...241_filesystem_cache_on_write_operations.sh | 2 +- .../02242_system_filesystem_cache_log_table.sh | 2 +- .../0_stateless/02263_lazy_mark_load.sh | 2 +- .../0_stateless/02286_drop_filesystem_cache.sh | 2 +- .../02313_filesystem_cache_seeks.sh | 2 +- .../0_stateless/02336_sparse_columns_s3.sql | 2 +- .../0_stateless/02343_aggregation_pipeline.sql | 2 +- ..._with_external_aggregation_memory_usage.sql | 2 +- .../0_stateless/02361_fsync_profile_events.sh | 4 ++-- .../02381_client_prints_server_side_time.sh | 2 +- .../02454_create_table_with_custom_disk.sql | 2 +- .../02497_trace_events_stress_long.sh | 2 +- ...3_cache_on_write_with_small_segment_size.sh | 2 +- .../02521_aggregation_by_partitions.sql | 2 +- .../0_stateless/02532_send_logs_level_test.sh | 4 ++-- ...4_fix_grouping_sets_predicate_push_down.sql | 2 +- .../02560_vertical_merge_memory_usage.sql | 2 +- .../02582_async_reading_with_small_limit.sql | 2 +- .../02703_max_local_read_bandwidth.sh | 2 +- .../02703_max_local_write_bandwidth.sh | 2 +- .../0_stateless/02704_max_backup_bandwidth.sh | 2 +- .../0_stateless/02725_memory-for-merges.sql | 2 +- .../02731_zero_objects_in_metadata.sh | 2 +- ...stem_parts_columns_modification_time.sql.j2 | 4 ++-- .../02808_filesystem_cache_drop_query.sh | 2 +- .../02833_multiprewhere_extra_column.sql | 2 +- ...artition_with_duplicated_parts_zookeeper.sh | 2 +- ...933_change_cache_setting_without_restart.sh | 2 +- ...dynamically_change_filesystem_cache_size.sh | 2 +- .../03008_local_plain_rewritable.sh | 2 +- ...32_dynamically_resize_filesystem_cache_2.sh | 2 +- 68 files changed, 92 insertions(+), 74 deletions(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 09a9f51084b..2215ac2b37c 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -213,6 +213,10 @@ function run_tests() ADDITIONAL_OPTIONS+=('--s3-storage') fi + if [[ -n "$USE_AZURE_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--azure-blob-storage') + fi + if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then ADDITIONAL_OPTIONS+=('--db-engine=Ordinary') fi diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 3ce489b9e0e..b56394df97a 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -207,7 +207,7 @@ function run_tests() if [[ -n "$USE_AZURE_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then # to disable the same tests - ADDITIONAL_OPTIONS+=('--s3-storage') + ADDITIONAL_OPTIONS+=('--azure-blob-storage') # azurite is slow, but with these two settings it can be super slow ADDITIONAL_OPTIONS+=('--no-random-settings') ADDITIONAL_OPTIONS+=('--no-random-merge-tree-settings') diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 6d121ba4142..96f8ecb2fab 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -211,7 +211,6 @@ clickhouse-client --query "SYSTEM STOP THREAD FUZZER" stop_server # Let's enable S3 storage by default -export USE_S3_STORAGE_FOR_MERGE_TREE=1 export RANDOMIZE_OBJECT_KEY_TYPE=1 export ZOOKEEPER_FAULT_INJECTION=1 export THREAD_POOL_FAULT_INJECTION=1 diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index bf0281cae68..486bfc25e22 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -30,6 +30,9 @@ def get_additional_envs(check_name: str) -> List[str]: if "azure" in check_name: result.append("USE_AZURE_STORAGE_FOR_MERGE_TREE=1") + if "s3" in check_name: + result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1") + return result diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 36870d59c3a..c581d35a289 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -700,7 +700,9 @@ class FailureReason(enum.Enum): NO_LONG = "not running long tests" REPLICATED_DB = "replicated-database" NON_ATOMIC_DB = "database engine not Atomic" + OBJECT_STORAGE = "object-storage" S3_STORAGE = "s3-storage" + AZURE_BLOB_STORAGE = "azure-blob-storage" BUILD = "not running for current build" NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas" SHARED_MERGE_TREE = "no-shared-merge-tree" @@ -1226,13 +1228,17 @@ class TestCase: elif tags and ("no-s3-storage" in tags) and args.s3_storage: return FailureReason.S3_STORAGE + elif tags and ("no-azure-blob-storage" in tags) and args.azure_blob_storage: + return FailureReason.AZURE_BLOB_STORAGE + elif tags and ("no-object-storage" in tags) and (args.azure_blob_storage or args.s3_storage): + return FailureReason.OBJECT_STORAGE elif ( tags - and "no-s3-storage-with-slow-build" in tags - and args.s3_storage + and "no-object-storage-with-slow-build" in tags + and (args.s3_storage or args.azure_blob_storage) and BuildFlags.RELEASE not in args.build_flags ): - return FailureReason.S3_STORAGE + return FailureReason.OBJECT_STORAGE elif tags: for build_flag in args.build_flags: @@ -3099,6 +3105,12 @@ def parse_args(): default=False, help="Run tests over s3 storage", ) + parser.add_argument( + "--azure-blob-storage", + action="store_true", + default=False, + help="Run tests over azure blob storage", + ) parser.add_argument( "--no-random-settings", action="store_true", diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index ffbf4df4ba7..13146f2eab0 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-s3-storage +# Tags: zookeeper, no-object-storage # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) diff --git a/tests/queries/0_stateless/00632_get_sample_block_cache.sql b/tests/queries/0_stateless/00632_get_sample_block_cache.sql index c54ca0b084e..ae9b6bb7b2c 100644 --- a/tests/queries/0_stateless/00632_get_sample_block_cache.sql +++ b/tests/queries/0_stateless/00632_get_sample_block_cache.sql @@ -1,4 +1,4 @@ --- Tags: long, no-s3-storage, no-asan +-- Tags: long, no-object-storage, no-asan SET joined_subquery_requires_alias = 0; diff --git a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh index af746c43da9..5a4fd901f8d 100755 --- a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh +++ b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage, no-tsan +# Tags: long, no-object-storage, no-tsan # no-s3 because read FileOpen metric set -e diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql index 4613576cf4e..009fc0bbb9f 100644 --- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql +++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql @@ -1,4 +1,4 @@ --- Tags: long, no-s3-storage, no-random-merge-tree-settings +-- Tags: long, no-object-storage, no-random-merge-tree-settings SET output_format_pretty_row_numbers = 0; DROP TABLE IF EXISTS check_system_tables; diff --git a/tests/queries/0_stateless/00763_lock_buffer_long.sh b/tests/queries/0_stateless/00763_lock_buffer_long.sh index 046e4efaa85..2006d43cdd2 100755 --- a/tests/queries/0_stateless/00763_lock_buffer_long.sh +++ b/tests/queries/0_stateless/00763_lock_buffer_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage, no-msan, no-asan, no-tsan, no-debug +# Tags: long, no-object-storage, no-msan, no-asan, no-tsan, no-debug # Some kind of stress test, it doesn't make sense to test in a non-release build set -e diff --git a/tests/queries/0_stateless/01070_mutations_with_dependencies.sql b/tests/queries/0_stateless/01070_mutations_with_dependencies.sql index 813ebf3f5a7..4d1cd54306c 100644 --- a/tests/queries/0_stateless/01070_mutations_with_dependencies.sql +++ b/tests/queries/0_stateless/01070_mutations_with_dependencies.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-s3-storage +-- Tags: no-parallel, no-object-storage -- With s3 policy TTL TO DISK 'default' doesn't work (because we have no default, only 's3') drop table if exists ttl; diff --git a/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql b/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql index 3a05e4507a2..166f44df2a7 100644 --- a/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql +++ b/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage -- Output slightly different plan drop table if exists t; diff --git a/tests/queries/0_stateless/01200_mutations_memory_consumption.sql b/tests/queries/0_stateless/01200_mutations_memory_consumption.sql index 5019abc38ab..f2d071961ee 100644 --- a/tests/queries/0_stateless/01200_mutations_memory_consumption.sql +++ b/tests/queries/0_stateless/01200_mutations_memory_consumption.sql @@ -1,4 +1,4 @@ --- Tags: no-debug, no-parallel, long, no-s3-storage, no-random-settings, no-random-merge-tree-settings +-- Tags: no-debug, no-parallel, long, no-object-storage, no-random-settings, no-random-merge-tree-settings SET optimize_trivial_insert_select = 1; DROP TABLE IF EXISTS table_with_single_pk; diff --git a/tests/queries/0_stateless/01221_system_settings.sql b/tests/queries/0_stateless/01221_system_settings.sql index fcffd6c45fe..da0204b37bd 100644 --- a/tests/queries/0_stateless/01221_system_settings.sql +++ b/tests/queries/0_stateless/01221_system_settings.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage select * from system.settings where name = 'send_timeout'; select * from system.merge_tree_settings order by length(description) limit 1; diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 index 047b1cc3ee7..9d74474c1a4 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 +++ b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 @@ -1,5 +1,5 @@ --- Tags: no-s3-storage, no-parallel, no-fasttest --- no-s3-storage: s3 has 20 more threads +-- Tags: no-object-storage, no-parallel, no-fasttest +-- no-object-storage: s3 has 20 more threads -- no-parallel: it checks the number of threads, which can be lowered in presence of other queries -- avoid settings randomization by clickhouse-test diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh index e83e49dffef..33b8f413fd5 100755 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings, no-s3-storage, no-msan +# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings, no-object-storage, no-msan # Tag no-fasttest: max_memory_usage_for_user can interfere another queries running concurrently # Regression for MemoryTracker that had been incorrectly accounted diff --git a/tests/queries/0_stateless/01293_optimize_final_force.sh b/tests/queries/0_stateless/01293_optimize_final_force.sh index d3d3d3e1ac5..e838af8af9b 100755 --- a/tests/queries/0_stateless/01293_optimize_final_force.sh +++ b/tests/queries/0_stateless/01293_optimize_final_force.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, long, no-debug, no-s3-storage +# Tags: no-fasttest, long, no-debug, no-object-storage # This test is too slow with S3 storage and debug modes. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh index 97148dc268e..2e27c2f7728 100755 --- a/tests/queries/0_stateless/01304_direct_io_long.sh +++ b/tests/queries/0_stateless/01304_direct_io_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage-with-slow-build +# Tags: long, no-object-storage-with-slow-build CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql b/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql index 614629351ef..15c9ec16700 100644 --- a/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql +++ b/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage DROP TABLE IF EXISTS test_01343; CREATE TABLE test_01343 (x String) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; INSERT INTO test_01343 VALUES ('Hello, world'); diff --git a/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql b/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql index 2e5ec563641..76cb535dcb7 100644 --- a/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql +++ b/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage DROP TABLE IF EXISTS test_01344; CREATE TABLE test_01344 (x String, INDEX idx (x) TYPE set(10) GRANULARITY 1) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; INSERT INTO test_01344 VALUES ('Hello, world'); diff --git a/tests/queries/0_stateless/01475_read_subcolumns.sql b/tests/queries/0_stateless/01475_read_subcolumns.sql index 8d4e3cb779b..d6eec2f84a1 100644 --- a/tests/queries/0_stateless/01475_read_subcolumns.sql +++ b/tests/queries/0_stateless/01475_read_subcolumns.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-settings +-- Tags: no-object-storage, no-random-settings SET use_uncompressed_cache = 0; diff --git a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh index 5a30f9e0f08..f74f6755e59 100755 --- a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh +++ b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage +# Tags: no-object-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh index c3c87eeaf8b..6098c826e32 100755 --- a/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh +++ b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-s3-storage +# Tags: race, zookeeper, no-object-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql index 49ef9d8b79f..e53f4476ec6 100644 --- a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql +++ b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql @@ -1,4 +1,4 @@ --- Tags: long, replica, no-replicated-database, no-parallel, no-s3-storage +-- Tags: long, replica, no-replicated-database, no-parallel, no-object-storage -- Tag no-replicated-database: Fails due to additional replicas or shards -- Tag no-parallel: static zk path diff --git a/tests/queries/0_stateless/01533_multiple_nested.sql b/tests/queries/0_stateless/01533_multiple_nested.sql index 1a6f0ec395e..80e9fc7e2fb 100644 --- a/tests/queries/0_stateless/01533_multiple_nested.sql +++ b/tests/queries/0_stateless/01533_multiple_nested.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-merge-tree-settings +-- Tags: no-object-storage, no-random-merge-tree-settings -- no-s3 because read FileOpen metric DROP TABLE IF EXISTS nested; diff --git a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql index 95b46c69e83..b5ece08196e 100644 --- a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql +++ b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-merge-tree-settings +-- Tags: no-object-storage, no-random-merge-tree-settings DROP TABLE IF EXISTS data_01551; diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index 187ff5c37e1..9b96ce3e586 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -1,6 +1,6 @@ --- Tags: no-random-merge-tree-settings, no-tsan, no-debug, no-s3-storage +-- Tags: no-random-merge-tree-settings, no-tsan, no-debug, no-object-storage -- no-tsan: too slow --- no-s3-storage: for remote tables we use thread pool even when reading with one stream, so memory consumption is higher +-- no-object-storage: for remote tables we use thread pool even when reading with one stream, so memory consumption is higher SET use_uncompressed_cache = 0; SET allow_prefetched_read_pool_for_remote_filesystem=0; diff --git a/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql b/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql index dfc761e1764..f7622bcf98f 100644 --- a/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql +++ b/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage drop table if exists data_01643; diff --git a/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql b/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql index 54c30fa2b1a..992cc687c88 100644 --- a/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql +++ b/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-s3-storage +-- Tags: no-parallel, no-object-storage -- no-parallel -- for flaky check and to avoid "Removing leftovers from table" (for other tables) -- Temporarily skip warning 'table was created by another server at the same moment, will retry' diff --git a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql index be0f7e8b710..921d28e6399 100644 --- a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql +++ b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql @@ -1,4 +1,4 @@ --- Tags: zookeeper, no-replicated-database, no-parallel, no-s3-storage +-- Tags: zookeeper, no-replicated-database, no-parallel, no-object-storage drop table if exists x; diff --git a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh index 3782a7d3ad6..c38fc505fa8 100755 --- a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh +++ b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage +# Tags: long, no-object-storage # Because parallel parts removal disabled for s3 storage # NOTE: this done as not .sql since we need to Ordinary database diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh index 9d87542d84d..d0e61541b15 100755 --- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh +++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings, no-replicated-database +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings, no-replicated-database # set -x diff --git a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql index ca1ee2738c7..6d86d995143 100644 --- a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql +++ b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql @@ -1,5 +1,5 @@ --- Tags: long, no-parallel, no-s3-storage --- no-s3-storage: Avoid flakiness due to cache / buffer usage +-- Tags: long, no-parallel, no-object-storage +-- no-object-storage: Avoid flakiness due to cache / buffer usage SET insert_keeper_fault_injection_probability=0; -- to succeed this test can require too many retries due to 100 partitions, so disable fault injections -- regression for MEMORY_LIMIT_EXCEEDED error because of deferred final part flush diff --git a/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql b/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql index 8bc75040e5a..48af5ae0031 100644 --- a/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql +++ b/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0; diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql index ee92931ec54..b791ee18e82 100644 --- a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings +-- Tags: no-parallel, no-fasttest, no-object-storage, no-random-settings -- { echo } diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.sql b/tests/queries/0_stateless/02240_filesystem_query_cache.sql index a609702f22a..40c80e04697 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.sql +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings +-- Tags: no-parallel, no-fasttest, no-object-storage, no-random-settings -- { echo } diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh index 57b8cec7864..8faf0a08f1f 100755 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh index 1028fba76f5..f8e7b7e7e72 100755 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh index 7a665d81eab..fe016f5a27f 100755 --- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh +++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02263_lazy_mark_load.sh b/tests/queries/0_stateless/02263_lazy_mark_load.sh index 5f80d9d7f6d..f1602e47e01 100755 --- a/tests/queries/0_stateless/02263_lazy_mark_load.sh +++ b/tests/queries/0_stateless/02263_lazy_mark_load.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-settings, no-parallel +# Tags: no-object-storage, no-random-settings, no-parallel set -eo pipefail CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh index a2c9352b7aa..32c9e9cb060 100755 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh index fbaec1ffaa7..b54e3d7f805 100755 --- a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh +++ b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02336_sparse_columns_s3.sql b/tests/queries/0_stateless/02336_sparse_columns_s3.sql index bf4622adedc..1dc1e980846 100644 --- a/tests/queries/0_stateless/02336_sparse_columns_s3.sql +++ b/tests/queries/0_stateless/02336_sparse_columns_s3.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest, no-s3-storage +-- Tags: no-parallel, no-fasttest, no-object-storage DROP TABLE IF EXISTS t_sparse_s3; diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.sql b/tests/queries/0_stateless/02343_aggregation_pipeline.sql index d73ac66763e..0f9dbd0247d 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.sql +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage -- produces different pipeline if enabled set enable_memory_bound_merging_of_aggregation_results = 0; diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index a5a3da82324..105fb500461 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -1,4 +1,4 @@ --- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-s3-storage +-- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-object-storage DROP TABLE IF EXISTS t_2354_dist_with_external_aggr; diff --git a/tests/queries/0_stateless/02361_fsync_profile_events.sh b/tests/queries/0_stateless/02361_fsync_profile_events.sh index e150d70b896..98c9cf9b7b4 100755 --- a/tests/queries/0_stateless/02361_fsync_profile_events.sh +++ b/tests/queries/0_stateless/02361_fsync_profile_events.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-merge-tree-settings -# Tag no-s3-storage: s3 does not have fsync +# Tags: no-object-storage, no-random-merge-tree-settings +# Tag no-object-storage: s3 does not have fsync CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02381_client_prints_server_side_time.sh b/tests/queries/0_stateless/02381_client_prints_server_side_time.sh index e6cd63da95d..81376ee3791 100755 --- a/tests/queries/0_stateless/02381_client_prints_server_side_time.sh +++ b/tests/queries/0_stateless/02381_client_prints_server_side_time.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-s3-storage +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-object-storage CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql b/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql index 6cb1c0774aa..a2d46cf6d1b 100644 --- a/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql +++ b/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-replicated-database +-- Tags: no-object-storage, no-replicated-database DROP TABLE IF EXISTS test; diff --git a/tests/queries/0_stateless/02497_trace_events_stress_long.sh b/tests/queries/0_stateless/02497_trace_events_stress_long.sh index c111ed40a29..dfd2f12b55b 100755 --- a/tests/queries/0_stateless/02497_trace_events_stress_long.sh +++ b/tests/queries/0_stateless/02497_trace_events_stress_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel, no-tsan, no-asan, no-debug, no-s3-storage, no-fasttest, no-replicated-database +# Tags: long, no-parallel, no-tsan, no-asan, no-debug, no-object-storage, no-fasttest, no-replicated-database set -e diff --git a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh index 4f3fd0e54f6..5aeab4c746e 100755 --- a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh +++ b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings +# Tags: no-parallel, no-fasttest, no-object-storage, no-random-settings CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none diff --git a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql index 55723360c38..b4d31e234d8 100644 --- a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql +++ b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql @@ -1,4 +1,4 @@ --- Tags: long, no-s3-storage +-- Tags: long, no-object-storage SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0; diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh index 4afc6d4496b..71f42e2a6db 100755 --- a/tests/queries/0_stateless/02532_send_logs_level_test.sh +++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-debug -# - no-s3-storage - S3 has additional logging +# Tags: no-object-storage, no-debug +# - no-object-storage - S3 has additional logging # - no-debug - debug builds also has additional logging CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql index 0891f1aa8a2..f926b9037d2 100644 --- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql +++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage DROP TABLE IF EXISTS test_grouping_sets_predicate; diff --git a/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql b/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql index 785fb10f70b..361305bac6d 100644 --- a/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql +++ b/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage drop table if exists tvm; create table tvm (c0 UInt64, c1 UInt64, c2 UInt64, c3 UInt64, c4 UInt64, c5 UInt64, c6 UInt64, c7 UInt64, c8 UInt64, c9 UInt64, c10 UInt64, c11 UInt64, c12 UInt64, c13 UInt64, c14 UInt64, c15 UInt64, c16 UInt64, c17 UInt64, c18 UInt64, c19 UInt64, c20 UInt64, c21 UInt64, c22 UInt64, c23 UInt64, c24 UInt64, c25 UInt64, c26 UInt64, c27 UInt64, c28 UInt64, c29 UInt64, c30 UInt64, c31 UInt64, c32 UInt64, c33 UInt64, c34 UInt64, c35 UInt64, c36 UInt64, c37 UInt64, c38 UInt64, c39 UInt64, c40 UInt64, c41 UInt64, c42 UInt64, c43 UInt64, c44 UInt64, c45 UInt64, c46 UInt64, c47 UInt64, c48 UInt64, c49 UInt64, c50 UInt64, c51 UInt64, c52 UInt64, c53 UInt64, c54 UInt64, c55 UInt64, c56 UInt64, c57 UInt64, c58 UInt64, c59 UInt64, c60 UInt64, c61 UInt64, c62 UInt64, c63 UInt64, c64 UInt64, c65 UInt64, c66 UInt64, c67 UInt64, c68 UInt64, c69 UInt64, c70 UInt64, c71 UInt64, c72 UInt64, c73 UInt64, c74 UInt64, c75 UInt64, c76 UInt64, c77 UInt64, c78 UInt64, c79 UInt64, c80 UInt64, c81 UInt64, c82 UInt64, c83 UInt64, c84 UInt64, c85 UInt64, c86 UInt64, c87 UInt64, c88 UInt64, c89 UInt64, c90 UInt64, c91 UInt64, c92 UInt64, c93 UInt64, c94 UInt64, c95 UInt64, c96 UInt64, c97 UInt64, c98 UInt64, c99 UInt64, c100 UInt64, c101 UInt64, c102 UInt64, c103 UInt64, c104 UInt64, c105 UInt64, c106 UInt64, c107 UInt64, c108 UInt64, c109 UInt64, c110 UInt64, c111 UInt64, c112 UInt64, c113 UInt64, c114 UInt64, c115 UInt64, c116 UInt64, c117 UInt64, c118 UInt64, c119 UInt64, c120 UInt64, c121 UInt64, c122 UInt64, c123 UInt64, c124 UInt64, c125 UInt64, c126 UInt64, c127 UInt64, c128 UInt64, c129 UInt64, c130 UInt64, c131 UInt64, c132 UInt64, c133 UInt64, c134 UInt64, c135 UInt64, c136 UInt64, c137 UInt64, c138 UInt64, c139 UInt64, c140 UInt64, c141 UInt64, c142 UInt64, c143 UInt64, c144 UInt64, c145 UInt64, c146 UInt64, c147 UInt64, c148 UInt64, c149 UInt64, c150 UInt64, c151 UInt64, c152 UInt64, c153 UInt64, c154 UInt64, c155 UInt64, c156 UInt64, c157 UInt64, c158 UInt64, c159 UInt64, c160 UInt64, c161 UInt64, c162 UInt64, c163 UInt64, c164 UInt64, c165 UInt64, c166 UInt64, c167 UInt64, c168 UInt64, c169 UInt64, c170 UInt64, c171 UInt64, c172 UInt64, c173 UInt64, c174 UInt64, c175 UInt64, c176 UInt64, c177 UInt64, c178 UInt64, c179 UInt64, c180 UInt64, c181 UInt64, c182 UInt64, c183 UInt64, c184 UInt64, c185 UInt64, c186 UInt64, c187 UInt64, c188 UInt64, c189 UInt64, c190 UInt64, c191 UInt64, c192 UInt64, c193 UInt64, c194 UInt64, c195 UInt64, c196 UInt64, c197 UInt64, c198 UInt64, c199 UInt64, c200 UInt64, c201 UInt64, c202 UInt64, c203 UInt64, c204 UInt64, c205 UInt64, c206 UInt64, c207 UInt64, c208 UInt64, c209 UInt64, c210 UInt64, c211 UInt64, c212 UInt64, c213 UInt64, c214 UInt64, c215 UInt64, c216 UInt64, c217 UInt64, c218 UInt64, c219 UInt64, c220 UInt64, c221 UInt64, c222 UInt64, c223 UInt64, c224 UInt64, c225 UInt64, c226 UInt64, c227 UInt64, c228 UInt64, c229 UInt64, c230 UInt64, c231 UInt64, c232 UInt64, c233 UInt64, c234 UInt64, c235 UInt64, c236 UInt64, c237 UInt64, c238 UInt64, c239 UInt64, c240 UInt64, c241 UInt64, c242 UInt64, c243 UInt64, c244 UInt64, c245 UInt64, c246 UInt64, c247 UInt64, c248 UInt64, c249 UInt64, c250 UInt64, c251 UInt64, c252 UInt64, c253 UInt64, c254 UInt64, c255 UInt64, c256 UInt64, c257 UInt64, c258 UInt64, c259 UInt64, c260 UInt64, c261 UInt64, c262 UInt64, c263 UInt64, c264 UInt64, c265 UInt64, c266 UInt64, c267 UInt64, c268 UInt64, c269 UInt64, c270 UInt64, c271 UInt64, c272 UInt64, c273 UInt64, c274 UInt64, c275 UInt64, c276 UInt64, c277 UInt64, c278 UInt64, c279 UInt64, c280 UInt64, c281 UInt64, c282 UInt64, c283 UInt64, c284 UInt64, c285 UInt64, c286 UInt64, c287 UInt64, c288 UInt64, c289 UInt64, c290 UInt64, c291 UInt64, c292 UInt64, c293 UInt64, c294 UInt64, c295 UInt64, c296 UInt64, c297 UInt64, c298 UInt64, c299 UInt64) engine = MergeTree order by tuple() settings min_rows_for_wide_part = 10, min_bytes_for_wide_part=0, vertical_merge_algorithm_min_rows_to_activate=1; diff --git a/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql b/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql index cb6b1b6083e..406cab82183 100644 --- a/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql +++ b/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0; diff --git a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh index c78cd202f1b..6f43c1ae869 100755 --- a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-settings, no-random-merge-tree-settings +# Tags: no-object-storage, no-random-settings, no-random-merge-tree-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh index 31cf6e9606e..4f6a300c5b3 100755 --- a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage +# Tags: no-object-storage CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh index 748bf856deb..8cb03a93a7a 100755 --- a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh +++ b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-settings, no-random-merge-tree-settings +# Tags: no-object-storage, no-random-settings, no-random-merge-tree-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02725_memory-for-merges.sql b/tests/queries/0_stateless/02725_memory-for-merges.sql index 1a8402dff4b..8e4d4f5b3e0 100644 --- a/tests/queries/0_stateless/02725_memory-for-merges.sql +++ b/tests/queries/0_stateless/02725_memory-for-merges.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-merge-tree-settings +-- Tags: no-object-storage, no-random-merge-tree-settings -- We allocate a lot of memory for buffers when reading or writing to S3 DROP TABLE IF EXISTS 02725_memory_for_merges SYNC; diff --git a/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh b/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh index eef52002e36..78659b70129 100755 --- a/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh +++ b/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-s3-storage +# Tags: no-fasttest, no-object-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 b/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 index eee236ff681..1ca5cc0bb7e 100644 --- a/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 +++ b/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 @@ -1,5 +1,5 @@ --- Tags: no-s3-storage --- Tag: no-s3-storage because S3 updates metadata for the virtual link file on metadata disk (see CreateHardlinkOperation::execute() for details) +-- Tags: no-object-storage +-- Tag: no-object-storage because S3 updates metadata for the virtual link file on metadata disk (see CreateHardlinkOperation::execute() for details) set mutations_sync=1; diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh index b44f9e50513..8a4a2e906b0 100755 --- a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh +++ b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql b/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql index 3a751294cba..da2f050cf38 100644 --- a/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql +++ b/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-random-settings, no-random-merge-tree-settings, no-s3-storage +-- Tags: no-parallel, no-random-settings, no-random-merge-tree-settings, no-object-storage drop table if exists t_multi_prewhere; drop row policy if exists policy_02834 on t_multi_prewhere; diff --git a/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh index edfed206d87..07d2ee27d22 100755 --- a/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh +++ b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-s3-storage +# Tags: zookeeper, no-object-storage # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) diff --git a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh index ddad7a1904b..76ada756f47 100755 --- a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh +++ b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage +# Tags: no-fasttest, no-parallel, no-object-storage CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh index 2e344a6b6e5..6f454da40da 100755 --- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh +++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_local_plain_rewritable.sh b/tests/queries/0_stateless/03008_local_plain_rewritable.sh index 5fac964a219..d51e180efc9 100755 --- a/tests/queries/0_stateless/03008_local_plain_rewritable.sh +++ b/tests/queries/0_stateless/03008_local_plain_rewritable.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-random-settings, no-s3-storage, no-replicated-database, no-shared-merge-tree +# Tags: no-random-settings, no-object-storage, no-replicated-database, no-shared-merge-tree # Tag no-random-settings: enable after root causing flakiness CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh index 526c4f84030..09bdd7f6b56 100755 --- a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh +++ b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From ebacab6c986fd4bbd98ebf4761f082948d22b3cc Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Jul 2024 14:17:58 +0200 Subject: [PATCH 236/439] Bump From 452201caf943451f15cc14fb6dfb21de33166376 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Jul 2024 14:21:39 +0200 Subject: [PATCH 237/439] Black --- tests/clickhouse-test | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index c581d35a289..8486e3a885f 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1230,7 +1230,11 @@ class TestCase: return FailureReason.S3_STORAGE elif tags and ("no-azure-blob-storage" in tags) and args.azure_blob_storage: return FailureReason.AZURE_BLOB_STORAGE - elif tags and ("no-object-storage" in tags) and (args.azure_blob_storage or args.s3_storage): + elif ( + tags + and ("no-object-storage" in tags) + and (args.azure_blob_storage or args.s3_storage) + ): return FailureReason.OBJECT_STORAGE elif ( tags From 70a2061c9bccd85f6c939529609051c06042c563 Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 2 Jul 2024 12:27:01 +0000 Subject: [PATCH 238/439] Fixed bug and added test --- src/Disks/ObjectStorages/IObjectStorage.h | 1 + .../StorageObjectStorageSource.cpp | 41 ++++--- .../StorageObjectStorageSource.h | 6 +- .../03036_reading_s3_archives.reference | 104 +++++++++--------- .../0_stateless/03036_reading_s3_archives.sql | 30 ++--- 5 files changed, 96 insertions(+), 86 deletions(-) diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 9f5c14fdb7c..6410a9a7a73 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -75,6 +75,7 @@ struct RelativePathWithMetadata virtual std::string getPath() const { return relative_path; } virtual bool isArchive() const { return false; } virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); } + virtual size_t fileSizeInArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); } }; struct ObjectKeyWithMetadata diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index aef783fc3c4..9436e729683 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -196,13 +196,12 @@ Chunk StorageObjectStorageSource::generate() const auto & filename = object_info->getFileName(); chassert(object_info->metadata); VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( - chunk, read_from_format_info.requested_virtual_columns, - { - .path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), - .size = object_info->metadata->size_bytes, - .filename = &filename, - .last_modified = object_info->metadata->last_modified - }); + chunk, + read_from_format_info.requested_virtual_columns, + {.path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), + .size = object_info->isArchive() ? object_info->fileSizeInArchive() : object_info->metadata->size_bytes, + .filename = &filename, + .last_modified = object_info->metadata->last_modified}); return chunk; } @@ -690,10 +689,9 @@ static IArchiveReader::NameFilter createArchivePathFilter(const std::string & ar StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive::ObjectInfoInArchive( ObjectInfoPtr archive_object_, const std::string & path_in_archive_, - std::shared_ptr archive_reader_) - : archive_object(archive_object_) - , path_in_archive(path_in_archive_) - , archive_reader(archive_reader_) + std::shared_ptr archive_reader_, + IArchiveReader::FileInfo && file_info_) + : archive_object(archive_object_), path_in_archive(path_in_archive_), archive_reader(archive_reader_), file_info(file_info_) { } @@ -732,6 +730,7 @@ StorageObjectStorageSource::ObjectInfoPtr StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) { std::unique_lock lock{next_mutex}; + IArchiveReader::FileInfo current_file_info{}; while (true) { if (filter) @@ -756,6 +755,8 @@ StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) path_in_archive = file_enumerator->getFileName(); if (!filter(path_in_archive)) continue; + else + current_file_info = file_enumerator->getFileInfo(); } else { @@ -769,15 +770,19 @@ StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) archive_reader = createArchiveReader(archive_object); if (!archive_reader->fileExists(path_in_archive)) continue; + else + current_file_info = archive_reader->getFileInfo(path_in_archive); } - - auto object_in_archive = std::make_shared(archive_object, path_in_archive, archive_reader); - - if (read_keys != nullptr) - read_keys->push_back(object_in_archive); - - return object_in_archive; + break; } + + auto object_in_archive + = std::make_shared(archive_object, path_in_archive, archive_reader, std::move(current_file_info)); + + if (read_keys != nullptr) + read_keys->push_back(object_in_archive); + + return object_in_archive; } size_t StorageObjectStorageSource::ArchiveIterator::estimatedKeysCount() diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index d93097d2636..2cbe8a9776c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -259,7 +259,8 @@ public: ObjectInfoInArchive( ObjectInfoPtr archive_object_, const std::string & path_in_archive_, - std::shared_ptr archive_reader_); + std::shared_ptr archive_reader_, + IArchiveReader::FileInfo && file_info_); std::string getFileName() const override { @@ -278,9 +279,12 @@ public: bool isArchive() const override { return true; } + size_t fileSizeInArchive() const override { return file_info.uncompressed_size; } + const ObjectInfoPtr archive_object; const std::string path_in_archive; const std::shared_ptr archive_reader; + const IArchiveReader::FileInfo file_info; }; private: diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.reference b/tests/queries/0_stateless/03036_reading_s3_archives.reference index 36ced212a1b..eacf16d0295 100644 --- a/tests/queries/0_stateless/03036_reading_s3_archives.reference +++ b/tests/queries/0_stateless/03036_reading_s3_archives.reference @@ -1,52 +1,52 @@ -1 Str1 example1.csv test/03036_archive1.zip::example1.csv -2 Str2 example1.csv test/03036_archive1.zip::example1.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.zip::example3.csv -6 Str6 example3.csv test/03036_archive2.zip::example3.csv -3 Str3 example2.csv test/03036_archive1.zip::example2.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive1.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -1 Str1 example1.csv test/03036_archive1.zip::example1.csv -2 Str2 example1.csv test/03036_archive1.zip::example1.csv -3 Str3 example2.csv test/03036_archive1.zip::example2.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive1.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.zip::example3.csv -6 Str6 example3.csv test/03036_archive2.zip::example3.csv -1 Str1 example1.csv test/03036_archive1.tar::example1.csv -2 Str2 example1.csv test/03036_archive1.tar::example1.csv -7 Str7 example4.csv test/03036_archive1.tar::example4.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive1.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -9 Str9 example5.csv test/03036_archive2.tar::example5.csv -10 Str10 example5.csv test/03036_archive2.tar::example5.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -11 Str11 example6.csv test/03036_archive3.tar.gz::example6.csv -12 Str12 example6.csv test/03036_archive3.tar.gz::example6.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -9 Str9 example5.csv test/03036_archive2.tar::example5.csv -10 Str10 example5.csv test/03036_archive2.tar::example5.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -13 Str13 example7.csv test/03036_compressed_file_archive.zip::example7.csv -14 Str14 example7.csv test/03036_compressed_file_archive.zip::example7.csv +1 Str1 25 example1.csv test/03036_archive1.zip::example1.csv +2 Str2 25 example1.csv test/03036_archive1.zip::example1.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.zip::example3.csv +6 Str6 25 example3.csv test/03036_archive2.zip::example3.csv +3 Str3 25 example2.csv test/03036_archive1.zip::example2.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive1.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +1 Str1 25 example1.csv test/03036_archive1.zip::example1.csv +2 Str2 25 example1.csv test/03036_archive1.zip::example1.csv +3 Str3 25 example2.csv test/03036_archive1.zip::example2.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive1.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.zip::example3.csv +6 Str6 25 example3.csv test/03036_archive2.zip::example3.csv +1 Str1 25 example1.csv test/03036_archive1.tar::example1.csv +2 Str2 25 example1.csv test/03036_archive1.tar::example1.csv +7 Str7 25 example4.csv test/03036_archive1.tar::example4.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive1.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +9 Str9 27 example5.csv test/03036_archive2.tar::example5.csv +10 Str10 27 example5.csv test/03036_archive2.tar::example5.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +11 Str11 29 example6.csv test/03036_archive3.tar.gz::example6.csv +12 Str12 29 example6.csv test/03036_archive3.tar.gz::example6.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +9 Str9 27 example5.csv test/03036_archive2.tar::example5.csv +10 Str10 27 example5.csv test/03036_archive2.tar::example5.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +13 Str13 57 example7.csv test/03036_compressed_file_archive.zip::example7.csv +14 Str14 57 example7.csv test/03036_compressed_file_archive.zip::example7.csv diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.sql b/tests/queries/0_stateless/03036_reading_s3_archives.sql index 00d7cc25e1a..43bda4ee704 100644 --- a/tests/queries/0_stateless/03036_reading_s3_archives.sql +++ b/tests/queries/0_stateless/03036_reading_s3_archives.sql @@ -1,22 +1,22 @@ -- Tags: no-fasttest -- Tag no-fasttest: Depends on AWS -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive1.zip :: example1.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive2.zip :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example2.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example*') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive1.tar :: example1.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar :: example4.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive2.tar :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar.gz :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv') ORDER BY (id, _file, _path); -select id, data, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } -select id, data, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent{2..3}.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive1.zip :: example1.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive2.zip :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example2.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example*') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive1.tar :: example1.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar :: example4.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive2.tar :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar.gz :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv') ORDER BY (id, _file, _path); +select id, data, _size, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +select id, data, _size, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent{2..3}.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } CREATE TABLE table_zip22 Engine S3(s3_conn, filename='03036_archive2.zip :: example2.csv'); -select id, data, _file, _path from table_zip22 ORDER BY (id, _file, _path); +select id, data, _size, _file, _path from table_zip22 ORDER BY (id, _file, _path); CREATE table table_tar2star Engine S3(s3_conn, filename='03036_archive2.tar :: example*.csv'); -SELECT id, data, _file, _path FROM table_tar2star ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM table_tar2star ORDER BY (id, _file, _path); CREATE table table_tarstarglobs Engine S3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv'); -SELECT id, data, _file, _path FROM table_tarstarglobs ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM table_tarstarglobs ORDER BY (id, _file, _path); CREATE table table_noexist Engine s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError UNKNOWN_STORAGE } -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) From f2244853164906cbf6faf186b7f3782ccc504d5a Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 2 Jul 2024 13:01:33 +0000 Subject: [PATCH 239/439] Add reference to documentation --- docs/en/sql-reference/table-functions/s3.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 1a7e2b8d66a..35e5d86034c 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -269,9 +269,9 @@ FROM s3( ## Virtual Columns {#virtual-columns} -- `_path` — Path to the file. Type: `LowCardinalty(String)`. -- `_file` — Name of the file. Type: `LowCardinalty(String)`. -- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. +- `_path` — Path to the file. Type: `LowCardinalty(String)`. In case of archive, shows path in a format: "{path_to_archive}::{path_to_file_inside_archive}" +- `_file` — Name of the file. Type: `LowCardinalty(String)`. In case of archive shows name of the file inside the archive. +- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive. - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Storage Settings {#storage-settings} From 06e235024f7b33c21be1ef2dc6210b40aabe7921 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 2 Jul 2024 15:16:57 +0200 Subject: [PATCH 240/439] work with review --- .../Transforms/ApplySquashingTransform.h | 2 +- .../DeduplicationTokenTransforms.cpp | 29 ++++++++----------- .../Transforms/DeduplicationTokenTransforms.h | 19 ++++++------ .../Transforms/SquashingTransform.cpp | 8 ++--- .../Transforms/buildPushingToViewsChain.cpp | 16 +++++----- src/Server/TCPHandler.cpp | 4 +-- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 10 ++++--- src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 6 ++-- src/Storages/MergeTree/MutateTask.cpp | 4 +-- .../MergeTree/ReplicatedMergeTreeSink.cpp | 5 ++-- src/Storages/WindowView/StorageWindowView.cpp | 6 ++-- 12 files changed, 55 insertions(+), 56 deletions(-) diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 94b890198d4..49a6581e685 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -32,7 +32,7 @@ public: protected: void onConsume(Chunk chunk) override { - cur_chunk = DB::Squashing::squash(std::move(chunk)); + cur_chunk = Squashing::squash(std::move(chunk)); } GenerateResult onGenerate() override diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index 374a6495f79..f50e69e730f 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -56,7 +56,7 @@ String TokenInfo::debugToken() const void TokenInfo::addChunkHash(String part) { - if (stage == UNDEFINED) + if (stage == UNDEFINED && empty()) stage = DEFINE_SOURCE_WITH_HASHES; if (stage != DEFINE_SOURCE_WITH_HASHES) @@ -65,7 +65,7 @@ void TokenInfo::addChunkHash(String part) addTokenPart(std::move(part)); } -void TokenInfo::defineSourceWithChunkHashes() +void TokenInfo::finishChunkHashes() { if (stage == UNDEFINED && empty()) stage = DEFINE_SOURCE_WITH_HASHES; @@ -78,7 +78,7 @@ void TokenInfo::defineSourceWithChunkHashes() void TokenInfo::setUserToken(const String & token) { - if (stage == UNDEFINED) + if (stage == UNDEFINED && empty()) stage = DEFINE_SOURCE_USER_TOKEN; if (stage != DEFINE_SOURCE_USER_TOKEN) @@ -87,7 +87,7 @@ void TokenInfo::setUserToken(const String & token) addTokenPart(fmt::format("user-token-{}", token)); } -void TokenInfo::defineSourceWithUserToken(size_t block_number) +void TokenInfo::setSourceWithUserToken(size_t block_number) { if (stage != DEFINE_SOURCE_USER_TOKEN) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); @@ -108,7 +108,7 @@ void TokenInfo::setViewID(const String & id) addTokenPart(fmt::format("view-id-{}", id)); } -void TokenInfo::defineViewID(size_t block_number) +void TokenInfo::setViewBlockNumber(size_t block_number) { if (stage != DEFINE_VIEW) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); @@ -138,6 +138,7 @@ size_t TokenInfo::getTotalSize() const for (const auto & part : parts) size += part.size(); + // we reserve more size here to be able to add delimenter between parts. return size + parts.size() - 1; } @@ -149,17 +150,11 @@ void CheckTokenTransform::transform(Chunk & chunk) if (!token_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); - if (!must_be_present) - { - LOG_DEBUG(log, "{}, no token required, token {}", debug, token_info->debugToken()); - return; - } - LOG_DEBUG(log, "debug: {}, token: {}", debug, token_info->debugToken()); } #endif -String SetInitialTokenTransform::getChunkHash(const Chunk & chunk) +String DefineSourceWithChunkHashesTransform::getChunkHash(const Chunk & chunk) { SipHash hash; for (const auto & colunm : chunk.getColumns()) @@ -170,20 +165,20 @@ String SetInitialTokenTransform::getChunkHash(const Chunk & chunk) } -void SetInitialTokenTransform::transform(Chunk & chunk) +void DefineSourceWithChunkHashesTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); if (!token_info) throw Exception( ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetInitialTokenTransform"); + "TokenInfo is expected for consumed chunk in DefineSourceWithChunkHashesTransform"); if (token_info->isDefined()) return; token_info->addChunkHash(getChunkHash(chunk)); - token_info->defineSourceWithChunkHashes(); + token_info->finishChunkHashes(); } void SetUserTokenTransform::transform(Chunk & chunk) @@ -203,7 +198,7 @@ void SetSourceBlockNumberTransform::transform(Chunk & chunk) throw Exception( ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform"); - token_info->defineSourceWithUserToken(block_number++); + token_info->setSourceWithUserToken(block_number++); } void SetViewIDTransform::transform(Chunk & chunk) @@ -223,7 +218,7 @@ void SetViewBlockNumberTransform::transform(Chunk & chunk) throw Exception( ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform"); - token_info->defineViewID(block_number++); + token_info->setViewBlockNumber(block_number++); } void ResetTokenTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 9d087536a38..79d168d1000 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -42,13 +42,13 @@ namespace DeduplicationToken bool isDefined() const { return stage == DEFINED; } void addChunkHash(String part); - void defineSourceWithChunkHashes(); + void finishChunkHashes(); void setUserToken(const String & token); - void defineSourceWithUserToken(size_t block_number); + void setSourceWithUserToken(size_t block_number); void setViewID(const String & id); - void defineViewID(size_t block_number); + void setViewBlockNumber(size_t block_number); void reset(); @@ -98,10 +98,9 @@ namespace DeduplicationToken class CheckTokenTransform : public ISimpleTransform { public: - CheckTokenTransform(String debug_, bool must_be_present_, const Block & header_) + CheckTokenTransform(String debug_, const Block & header_) : ISimpleTransform(header_, header_, true) , debug(std::move(debug_)) - , must_be_present(must_be_present_) { } @@ -112,7 +111,6 @@ namespace DeduplicationToken private: String debug; LoggerPtr log = getLogger("CheckInsertDeduplicationTokenTransform"); - bool must_be_present = false; }; #endif @@ -134,16 +132,19 @@ namespace DeduplicationToken }; - class SetInitialTokenTransform : public ISimpleTransform + class DefineSourceWithChunkHashesTransform : public ISimpleTransform { public: - explicit SetInitialTokenTransform(const Block & header_) + explicit DefineSourceWithChunkHashesTransform(const Block & header_) : ISimpleTransform(header_, header_, true) { } - String getName() const override { return "DeduplicationToken::SetInitialTokenTransform"; } + String getName() const override { return "DeduplicationToken::DefineSourceWithChunkHashesTransform"; } + // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with heshes from the parts. + // But if there is some table with different engine, we still need to define the source of the data in deduplication token + // We use that transform to define the source as a hash of entire block in deduplication token void transform(Chunk & chunk) override; static String getChunkHash(const Chunk & chunk); diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index e457a262681..1fb4433240a 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -18,7 +18,7 @@ SquashingTransform::SquashingTransform( void SquashingTransform::onConsume(Chunk chunk) { - cur_chunk = DB::Squashing::squash(squashing.add(std::move(chunk))); + cur_chunk = Squashing::squash(squashing.add(std::move(chunk))); } SquashingTransform::GenerateResult SquashingTransform::onGenerate() @@ -31,7 +31,7 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate() void SquashingTransform::onFinish() { - finish_chunk = DB::Squashing::squash(squashing.flush()); + finish_chunk = Squashing::squash(squashing.flush()); } void SquashingTransform::work() @@ -63,14 +63,14 @@ void SimpleSquashingTransform::transform(Chunk & chunk) { if (!finished) { - chunk = DB::Squashing::squash(squashing.add(std::move(chunk))); + chunk = Squashing::squash(squashing.add(std::move(chunk))); } else { if (chunk.hasRows()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - chunk = DB::Squashing::squash(squashing.flush()); + chunk = Squashing::squash(squashing.flush()); } } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 713ab25600f..8d38396ecd5 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -357,7 +357,7 @@ std::optional generateViewChain( } #ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Before squashing", !disable_deduplication_for_children, out.getInputHeader())); + out.addSource(std::make_shared("Before squashing", out.getInputHeader())); #endif auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); @@ -403,7 +403,7 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { #ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); + out.addSource(std::make_shared("Right after Inner query", out.getInputHeader())); #endif auto executing_inner_query = std::make_shared( @@ -413,7 +413,7 @@ std::optional generateViewChain( out.addSource(std::move(executing_inner_query)); #ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Right before Inner query", !disable_deduplication_for_children, out.getInputHeader())); + out.addSource(std::make_shared("Right before Inner query", out.getInputHeader())); #endif } @@ -547,7 +547,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (auto * window_view = dynamic_cast(storage.get())) { @@ -555,7 +555,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (dynamic_cast(storage.get())) { @@ -564,7 +564,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } /// Do not push to destination table if the flag is set else if (!no_destination) @@ -573,13 +573,13 @@ Chain buildPushingToViewsChain( metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); sink->setRuntimeData(thread_status, elapsed_counter_ms); - result_chain.addSource(std::make_shared(sink->getHeader())); + result_chain.addSource(std::make_shared(sink->getHeader())); result_chain.addSource(std::move(sink)); } else { - result_chain.addSource(std::make_shared(storage_header)); + result_chain.addSource(std::make_shared(storage_header)); } if (result_chain.empty()) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a705ae2e013..ee38b7242b1 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -889,7 +889,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro while (readDataNext()) { squashing.setHeader(state.block_for_insert.cloneEmpty()); - auto result_chunk = DB::Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()})); + auto result_chunk = Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()})); if (result_chunk) { auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); @@ -901,7 +901,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - Chunk result_chunk = DB::Squashing::squash(squashing.flush()); + Chunk result_chunk = Squashing::squash(squashing.flush()); if (!result_chunk) { return insert_queue.pushQueryWithBlock(state.parsed_query, squashing.getHeader(), query_context); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 63858ce601d..429fd8b67c5 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -2322,12 +2322,11 @@ String IMergeTreeDataPart::getUniqueId() const return getDataPartStorage().getUniqueId(); } -String IMergeTreeDataPart::getPartBlockIDHash() const +UInt128 IMergeTreeDataPart::getPartBlockIDHash() const { SipHash hash; checksums.computeTotalChecksumDataOnly(hash); - const auto hash_value = hash.get128(); - return toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); + return hash.get128(); } String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const @@ -2336,7 +2335,10 @@ String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get block id for non zero level part {}", name); if (token.empty()) - return info.partition_id + "_" + getPartBlockIDHash(); + { + const auto hash_value = getPartBlockIDHash(); + return info.partition_id + "_" + toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); + } SipHash hash; hash.update(token.data(), token.size()); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 204dfdaad0a..dbb1df3cfe8 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -210,7 +210,7 @@ public: /// Compute part block id for zero level part. Otherwise throws an exception. /// If token is not empty, block id is calculated based on it instead of block data - String getPartBlockIDHash() const; + UInt128 getPartBlockIDHash() const; String getZeroLevelPartBlockID(std::string_view token) const; void setName(const String & new_name); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 7bc04c05a1c..4a1163d2317 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -126,7 +126,8 @@ void MergeTreeSink::consume(Chunk & chunk) if (!token_info->isDefined()) { chassert(temp_part.part); - token_info->addChunkHash(temp_part.part->getPartBlockIDHash()); + const auto hash_value = temp_part.part->getPartBlockIDHash(); + token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); } if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) @@ -167,7 +168,7 @@ void MergeTreeSink::consume(Chunk & chunk) if (!token_info->isDefined()) { - token_info->defineSourceWithChunkHashes(); + token_info->finishChunkHashes(); } finishDelayedChunk(); @@ -206,7 +207,6 @@ void MergeTreeSink::finishDelayedChunk() if (settings.insert_deduplicate && deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); - auto res = deduplication_log->addPart(block_id, part->info); if (!res.second) { diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 5da36b6ee3b..3dbcb5e5bda 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1317,7 +1317,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() Block block_to_squash = projection.calculate(cur_block, ctx->context); projection_squashes[i].setHeader(block_to_squash.cloneEmpty()); - Chunk squashed_chunk = DB::Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); + Chunk squashed_chunk = Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); if (squashed_chunk) { auto result = projection_squashes[i].getHeader().cloneWithColumns(squashed_chunk.detachColumns()); @@ -1341,7 +1341,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash_plan = projection_squashes[i]; - auto squashed_chunk = DB::Squashing::squash(projection_squash_plan.flush()); + auto squashed_chunk = Squashing::squash(projection_squash_plan.flush()); if (squashed_chunk) { auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns()); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 228b5c596ab..3677f5b02ab 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -374,7 +374,8 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if (!token_info->isDefined()) { chassert(temp_part.part); - token_info->addChunkHash(temp_part.part->getPartBlockIDHash()); + const auto hash_value = temp_part.part->getPartBlockIDHash(); + token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); } } @@ -423,7 +424,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if (!token_info->isDefined()) { - token_info->defineSourceWithChunkHashes(); + token_info->finishChunkHashes(); } finishDelayedChunk(zookeeper); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index ccb6259da00..e36247103c7 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1558,7 +1558,7 @@ void StorageWindowView::writeIntoWindowView( #ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { - return std::make_shared("StorageWindowView: Afrer tmp table before squashing", true, stream_header); + return std::make_shared("StorageWindowView: Afrer tmp table before squashing", stream_header); }); #endif @@ -1604,7 +1604,7 @@ void StorageWindowView::writeIntoWindowView( #ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { - return std::make_shared("StorageWindowView: Afrer WatermarkTransform", true, stream_header); + return std::make_shared("StorageWindowView: Afrer WatermarkTransform", stream_header); }); #endif @@ -1630,7 +1630,7 @@ void StorageWindowView::writeIntoWindowView( #ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { - return std::make_shared("StorageWindowView: Before out", true, stream_header); + return std::make_shared("StorageWindowView: Before out", stream_header); }); #endif From f6a2c3156bd81fba8a48a04df5e5095fb8b5a384 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 2 Jul 2024 15:24:29 +0200 Subject: [PATCH 241/439] rename transform --- .../Transforms/DeduplicationTokenTransforms.cpp | 4 ++-- .../Transforms/DeduplicationTokenTransforms.h | 4 ++-- src/Processors/Transforms/buildPushingToViewsChain.cpp | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index f50e69e730f..6786f76cbef 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -154,7 +154,7 @@ void CheckTokenTransform::transform(Chunk & chunk) } #endif -String DefineSourceWithChunkHashesTransform::getChunkHash(const Chunk & chunk) +String DefineSourceWithChunkHashTransform::getChunkHash(const Chunk & chunk) { SipHash hash; for (const auto & colunm : chunk.getColumns()) @@ -165,7 +165,7 @@ String DefineSourceWithChunkHashesTransform::getChunkHash(const Chunk & chunk) } -void DefineSourceWithChunkHashesTransform::transform(Chunk & chunk) +void DefineSourceWithChunkHashTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 79d168d1000..94287dc4487 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -132,10 +132,10 @@ namespace DeduplicationToken }; - class DefineSourceWithChunkHashesTransform : public ISimpleTransform + class DefineSourceWithChunkHashTransform : public ISimpleTransform { public: - explicit DefineSourceWithChunkHashesTransform(const Block & header_) + explicit DefineSourceWithChunkHashTransform(const Block & header_) : ISimpleTransform(header_, header_, true) { } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 8d38396ecd5..312b333ab33 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -547,7 +547,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (auto * window_view = dynamic_cast(storage.get())) { @@ -555,7 +555,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (dynamic_cast(storage.get())) { @@ -564,7 +564,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } /// Do not push to destination table if the flag is set else if (!no_destination) @@ -573,13 +573,13 @@ Chain buildPushingToViewsChain( metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); sink->setRuntimeData(thread_status, elapsed_counter_ms); - result_chain.addSource(std::make_shared(sink->getHeader())); + result_chain.addSource(std::make_shared(sink->getHeader())); result_chain.addSource(std::move(sink)); } else { - result_chain.addSource(std::make_shared(storage_header)); + result_chain.addSource(std::make_shared(storage_header)); } if (result_chain.empty()) From cfa86b54ea9977e5730d91e799856238b3f5432e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 2 Jul 2024 16:33:06 +0200 Subject: [PATCH 242/439] Review fixes --- .../ClusterProxy/executeQuery.cpp | 63 ++++++++++--------- src/Interpreters/ClusterProxy/executeQuery.h | 5 +- src/Interpreters/Context.h | 3 + src/Interpreters/InterpreterSelectQuery.cpp | 4 ++ src/Planner/PlannerJoinTree.cpp | 4 ++ src/Storages/StorageDistributed.cpp | 10 ++- src/Storages/StorageMergeTree.cpp | 6 ++ src/Storages/StorageReplicatedMergeTree.cpp | 6 ++ .../test_parallel_replicas_custom_key/test.py | 6 -- .../test.py | 3 + 10 files changed, 71 insertions(+), 39 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 47fbf35233a..5d56ef09127 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -228,6 +228,35 @@ static ThrottlerPtr getThrottler(const ContextPtr & context) return throttler; } +AdditionalShardFilterGenerator +getShardFilterGeneratorForCustomKey(const Cluster & cluster, ContextPtr context, const ColumnsDescription & columns) +{ + if (!context->canUseParallelReplicasCustomKeyForCluster(cluster)) + return {}; + + const auto & settings = context->getSettingsRef(); + auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *context); + if (custom_key_ast == nullptr) + return {}; + + return [my_custom_key_ast = std::move(custom_key_ast), + column_description = columns, + custom_key_type = settings.parallel_replicas_custom_key_filter_type.value, + custom_key_range_lower = settings.parallel_replicas_custom_key_range_lower.value, + custom_key_range_upper = settings.parallel_replicas_custom_key_range_upper.value, + query_context = context, + replica_count = cluster.getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr + { + return getCustomKeyFilterForParallelReplica( + replica_count, + replica_num - 1, + my_custom_key_ast, + {custom_key_type, custom_key_range_lower, custom_key_range_upper}, + column_description, + query_context); + }; +} + void executeQuery( QueryPlan & query_plan, @@ -239,43 +268,17 @@ void executeQuery( LoggerPtr log, ContextPtr context, const SelectQueryInfo & query_info, - const ColumnsDescription & columns, const ExpressionActionsPtr & sharding_key_expr, const std::string & sharding_key_column_name, const DistributedSettings & distributed_settings, + AdditionalShardFilterGenerator shard_filter_generator, bool is_remote_function) { const Settings & settings = context->getSettingsRef(); - if (settings.max_distributed_depth && context->getClientInfo().distributed_depth >= settings.max_distributed_depth) throw Exception(ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH, "Maximum distributed depth exceeded"); - ClusterProxy::AdditionalShardFilterGenerator shard_filter_generator; - if (context->canUseParallelReplicasCustomKeyForCluster(*query_info.getCluster())) - { - if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *context)) - { - shard_filter_generator = - [my_custom_key_ast = std::move(custom_key_ast), - column_description = columns, - custom_key_type = settings.parallel_replicas_custom_key_filter_type.value, - custom_key_range_lower = settings.parallel_replicas_custom_key_range_lower.value, - custom_key_range_upper = settings.parallel_replicas_custom_key_range_upper.value, - query_context = context, - replica_count = query_info.getCluster()->getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr - { - return getCustomKeyFilterForParallelReplica( - replica_count, - replica_num - 1, - my_custom_key_ast, - {custom_key_type, custom_key_range_lower, custom_key_range_upper}, - column_description, - query_context); - }; - } - } - const ClusterPtr & not_optimized_cluster = query_info.cluster; std::vector plans; @@ -599,6 +602,8 @@ void executeQueryWithParallelReplicasCustomKey( ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory(header, columns_object, snapshot, processed_stage); + auto shard_filter_generator = getShardFilterGeneratorForCustomKey(*query_info.getCluster(), context, columns); + ClusterProxy::executeQuery( query_plan, header, @@ -609,11 +614,11 @@ void executeQueryWithParallelReplicasCustomKey( getLogger("executeQueryWithParallelReplicasCustomKey"), context, query_info, - columns, /*sharding_key_expr=*/nullptr, /*sharding_key_column_name=*/{}, /*distributed_settings=*/{}, - /*is_remote_function= */ false); + shard_filter_generator, + /*is_remote_function=*/false); } void executeQueryWithParallelReplicasCustomKey( diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index cf60fc3f168..c22fcd24f03 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -52,6 +52,9 @@ class SelectStreamFactory; ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table); using AdditionalShardFilterGenerator = std::function; +AdditionalShardFilterGenerator +getShardFilterGeneratorForCustomKey(const Cluster & cluster, ContextPtr context, const ColumnsDescription & columns); + /// Execute a distributed query, creating a query plan, from which the query pipeline can be built. /// `stream_factory` object encapsulates the logic of creating plans for a different type of query /// (currently SELECT, DESCRIBE). @@ -65,10 +68,10 @@ void executeQuery( LoggerPtr log, ContextPtr context, const SelectQueryInfo & query_info, - const ColumnsDescription & columns, const ExpressionActionsPtr & sharding_key_expr, const std::string & sharding_key_column_name, const DistributedSettings & distributed_settings, + AdditionalShardFilterGenerator shard_filter_generator, bool is_remote_function); void executeQueryWithParallelReplicas( diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 680c9ecaa1c..85b9f1d249e 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -461,6 +461,9 @@ protected: /// mutation tasks of one mutation executed against different parts of the same table. PreparedSetsCachePtr prepared_sets_cache; + /// this is a mode of parallel replicas where we set parallel_replicas_count and parallel_replicas_offset + /// and generate specific filters on the replicas (e.g. when using parallel replicas with sample key) + /// if we already use a different mode of parallel replicas we want to disable this mode bool offset_parallel_replicas_enabled = true; public: diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 571a3c67415..fae204912fc 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -592,6 +592,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( "or it's invalid (settings `parallel_replicas_custom_key`)"); } } + /// We disable prefer_localhost_replica because if one of the replicas is local it will create a single local plan + /// instead of executing the query with multiple replicas + /// We can enable this setting again for custom key parallel replicas when we can generate a plan that will use both a + /// local plan and remote replicas else if (auto * distributed = dynamic_cast(storage.get()); distributed && context->canUseParallelReplicasCustomKeyForCluster(*distributed->getCluster())) { diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 19028839ea7..5c66f69638c 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -846,6 +846,10 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres distributed && query_context->canUseParallelReplicasCustomKeyForCluster(*distributed->getCluster())) { planner_context->getMutableQueryContext()->setSetting("distributed_group_by_no_merge", 2); + /// We disable prefer_localhost_replica because if one of the replicas is local it will create a single local plan + /// instead of executing the query with multiple replicas + /// We can enable this setting again for custom key parallel replicas when we can generate a plan that will use both a + /// local plan and remote replicas planner_context->getMutableQueryContext()->setSetting("prefer_localhost_replica", Field{0}); } } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 1dd42d79d88..2cf3ced2904 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -839,7 +839,9 @@ void StorageDistributed::read( SelectQueryInfo modified_query_info = query_info; - if (local_context->getSettingsRef().allow_experimental_analyzer) + const auto & settings = local_context->getSettingsRef(); + + if (settings.allow_experimental_analyzer) { StorageID remote_storage_id = StorageID::createEmpty(); if (!remote_table_function_ptr) @@ -864,7 +866,6 @@ void StorageDistributed::read( header = InterpreterSelectQuery(modified_query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); } - const auto & settings = local_context->getSettingsRef(); if (!settings.allow_experimental_analyzer) { modified_query_info.query = ClusterProxy::rewriteSelectQuery( @@ -894,6 +895,9 @@ void StorageDistributed::read( storage_snapshot, processed_stage); + auto shard_filter_generator = ClusterProxy::getShardFilterGeneratorForCustomKey( + *modified_query_info.getCluster(), local_context, getInMemoryMetadataPtr()->columns); + ClusterProxy::executeQuery( query_plan, header, @@ -904,10 +908,10 @@ void StorageDistributed::read( log, local_context, modified_query_info, - getInMemoryMetadataPtr()->columns, sharding_key_expr, sharding_key_column_name, distributed_settings, + shard_filter_generator, /* is_remote_function= */ static_cast(owned_cluster)); /// This is a bug, it is possible only when there is no shards to query, and this is handled earlier. diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 58f64d6158b..061f611927d 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -242,6 +242,12 @@ void StorageMergeTree::read( local_context); return; } + else + LOG_WARNING( + log, + "Parallel replicas with custom key will not be used because cluster defined by 'cluster_for_parallel_replicas' ('{}') has " + "multiple shards", + cluster->getName()); } const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index baa4b26102e..d1e2a9c55b7 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5490,6 +5490,12 @@ void StorageReplicatedMergeTree::read( local_context); return; } + else + LOG_WARNING( + log, + "Parallel replicas with custom key will not be used because cluster defined by 'cluster_for_parallel_replicas' ('{}') has " + "multiple shards", + cluster->getName()); } readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); } diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py index 9a2480a77c3..affa3f32cbe 100644 --- a/tests/integration/test_parallel_replicas_custom_key/test.py +++ b/tests/integration/test_parallel_replicas_custom_key/test.py @@ -23,12 +23,6 @@ def start_cluster(): cluster.shutdown() -def create_tables(cluster): - n1 = nodes[0] - n1.query("DROP TABLE IF EXISTS dist_table SYNC") - n1.query(f"DROP TABLE IF EXISTS test_table ON CLUSTER {cluster} SYNC") - - def insert_data(table_name, row_num, all_nodes=False): query = ( f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers({row_num})" diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/test.py b/tests/integration/test_parallel_replicas_custom_key_failover/test.py index 5c1c8ef71ab..f24a24f3238 100644 --- a/tests/integration/test_parallel_replicas_custom_key_failover/test.py +++ b/tests/integration/test_parallel_replicas_custom_key_failover/test.py @@ -53,11 +53,13 @@ def create_tables(cluster, table_name): @pytest.mark.parametrize("use_hedged_requests", [1, 0]) @pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) @pytest.mark.parametrize("filter_type", ["default", "range"]) +@pytest.mark.parametrize("prefer_localhost_replica", [0, 1]) def test_parallel_replicas_custom_key_failover( start_cluster, use_hedged_requests, custom_key, filter_type, + prefer_localhost_replica, ): cluster_name = "test_single_shard_multiple_replicas" table = "test_table" @@ -78,6 +80,7 @@ def test_parallel_replicas_custom_key_failover( "parallel_replicas_custom_key": custom_key, "parallel_replicas_custom_key_filter_type": filter_type, "use_hedged_requests": use_hedged_requests, + "prefer_localhost_replica": prefer_localhost_replica, # avoid considering replica delay on connection choice # otherwise connection can be not distributed evenly among available nodes # and so custom key secondary queries (we check it bellow) From ad6ddf634db8318f48b8f5e95d17473a1e5ae3e3 Mon Sep 17 00:00:00 2001 From: jwoodhead Date: Tue, 2 Jul 2024 09:33:15 -0500 Subject: [PATCH 243/439] Include offset argument for lagInFrame and leadInFrame window functions. Fixes #65952 --- docs/en/sql-reference/window-functions/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 3a8afd10359..530eaae7283 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -80,8 +80,8 @@ These functions can be used only as a window function. - `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - `rank()` - Rank the current row within its partition with gaps. - `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. -- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +- `lagInFrame(x, offset)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. +- `leadInFrame(x, offset)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. ## Examples From ce19dc5cd91a1424a172959d62d629646dfd7b38 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Tue, 2 Jul 2024 14:37:33 +0000 Subject: [PATCH 244/439] fix test --- .../0_stateless/03161_lightweight_delete_projection.reference | 2 +- .../queries/0_stateless/03161_lightweight_delete_projection.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index 15832d4cdfa..c5a6cbab0bc 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -1,2 +1,2 @@ -8888 Alice 50 1231 John 33 +8888 Alice 50 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index 70a069df1bc..b189388e356 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -26,6 +26,6 @@ SELECT FROM system.projection_parts WHERE (database = currentDatabase()) AND (`table` = 'users'); -SELECT * FROM users; +SELECT * FROM users ORDER BY uid; DROP TABLE users; From 8dfa8d6df48e82e321641a239ccd715c4d188c62 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Jul 2024 16:42:42 +0200 Subject: [PATCH 245/439] Add more Azure profile events + AzureUploadPart to AzureStageBlock --- src/Common/ProfileEvents.cpp | 8 +++++-- src/Coordination/KeeperConstants.cpp | 10 +++++++-- .../IO/WriteBufferFromAzureBlobStorage.cpp | 21 +++++++++++++++++++ .../copyAzureBlobStorageFile.cpp | 17 ++++++++++----- 4 files changed, 47 insertions(+), 9 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index d98373b6c55..eaff2cf8856 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -447,14 +447,18 @@ The server successfully detected this situation and will download merged part fr M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \ \ M(AzureGetObject, "Number of Azure API GetObject calls.") \ - M(AzureUploadPart, "Number of Azure blob storage API UploadPart calls") \ + M(AzureUpload, "Number of Azure blob storage API Upload calls") \ + M(AzureStageBlock, "Number of Azure blob storage API StageBlock calls") \ + M(AzureCommitBlockList, "Number of Azure blob storage API CommitBlockList calls") \ M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \ M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \ M(AzureGetProperties, "Number of Azure blob storage API GetProperties calls.") \ \ M(DiskAzureGetObject, "Number of Disk Azure API GetObject calls.") \ - M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \ + M(DiskAzureUpload, "Number of Disk Azure blob storage API Upload calls") \ + M(DiskAzureStageBlock, "Number of Disk Azure blob storage API StageBlock calls") \ + M(DiskAzureCommitBlockList, "Number of Disk Azure blob storage API CommitBlockList calls") \ M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \ M(DiskAzureListObjects, "Number of Disk Azure blob storage API ListObjects calls.") \ M(DiskAzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp index 51bf037c1c9..76541db6112 100644 --- a/src/Coordination/KeeperConstants.cpp +++ b/src/Coordination/KeeperConstants.cpp @@ -150,12 +150,18 @@ M(S3PutObject) \ M(S3GetObject) \ \ - M(AzureUploadPart) \ - M(DiskAzureUploadPart) \ + M(AzureUpload) \ + M(DiskAzureUpload) \ + M(AzureStageBlock) \ + M(DiskAzureStageBlock) \ + M(AzureCommitBlockList) \ + M(DiskAzureCommitBlockList) \ M(AzureCopyObject) \ M(DiskAzureCopyObject) \ M(AzureDeleteObjects) \ + M(DiskAzureDeleteObjects) \ M(AzureListObjects) \ + M(DiskAzureListObjects) \ \ M(DiskS3DeleteObjects) \ M(DiskS3CopyObject) \ diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index a2d21cf49c2..d1324e22978 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -14,6 +14,15 @@ namespace ProfileEvents { extern const Event RemoteWriteThrottlerBytes; extern const Event RemoteWriteThrottlerSleepMicroseconds; + + extern const Event AzureUpload; + extern const Event AzureStageBlock; + extern const Event AzureCommitBlockList; + + extern const Event DiskAzureUpload; + extern const Event DiskAzureStageBlock; + extern const Event DiskAzureCommitBlockList; + } namespace DB @@ -134,6 +143,10 @@ void WriteBufferFromAzureBlobStorage::preFinalize() /// then we use single part upload instead of multi part upload if (block_ids.empty() && detached_part_data.size() == 1 && detached_part_data.front().data_size <= max_single_part_upload_size) { + ProfileEvents::increment(ProfileEvents::AzureUpload); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureUpload); + auto part_data = std::move(detached_part_data.front()); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(part_data.memory.data()), part_data.data_size); @@ -164,6 +177,10 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() if (!block_ids.empty()) { auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + ProfileEvents::increment(ProfileEvents::AzureCommitBlockList); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureCommitBlockList); + execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); } @@ -269,6 +286,10 @@ void WriteBufferFromAzureBlobStorage::writePart(WriteBufferFromAzureBlobStorage: auto & data_block_id = std::get<0>(*worker_data); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + ProfileEvents::increment(ProfileEvents::AzureStageBlock); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureStageBlock); + Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(std::get<1>(*worker_data).memory.data()), data_size); execWithRetry([&](){ block_blob_client.StageBlock(data_block_id, memory_stream); }, max_unexpected_write_error_retries, data_size); }; diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 8bd436f218c..43052f661b3 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -16,10 +16,14 @@ namespace ProfileEvents { extern const Event AzureCopyObject; - extern const Event AzureUploadPart; + extern const Event AzureUpload; + extern const Event AzureStageBlock; + extern const Event AzureCommitBlockList; extern const Event DiskAzureCopyObject; - extern const Event DiskAzureUploadPart; + extern const Event DiskAzureUpload; + extern const Event DiskAzureStageBlock; + extern const Event DiskAzureCommitBlockList; } @@ -156,6 +160,10 @@ namespace void completeMultipartUpload() { auto block_blob_client = client->GetBlockBlobClient(dest_blob); + ProfileEvents::increment(ProfileEvents::AzureCommitBlockList); + if (client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureCommitBlockList); + block_blob_client.CommitBlockList(block_ids); } @@ -259,9 +267,9 @@ namespace void processUploadPartRequest(UploadPartTask & task) { - ProfileEvents::increment(ProfileEvents::AzureUploadPart); + ProfileEvents::increment(ProfileEvents::AzureStageBlock); if (client->GetClickhouseOptions().IsClientForDisk) - ProfileEvents::increment(ProfileEvents::DiskAzureUploadPart); + ProfileEvents::increment(ProfileEvents::DiskAzureStageBlock); auto block_blob_client = client->GetBlockBlobClient(dest_blob); auto read_buffer = std::make_unique(create_read_buffer(), task.part_offset, task.part_size); @@ -333,7 +341,6 @@ void copyAzureBlobStorageFile( const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule) { - if (settings->use_native_copy) { LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); From 4ac30aa7d578acf00928ac2301fa7b50da9a040f Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Jul 2024 17:18:14 +0200 Subject: [PATCH 246/439] Fxi style --- src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 43052f661b3..6386c7a3c76 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -16,12 +16,10 @@ namespace ProfileEvents { extern const Event AzureCopyObject; - extern const Event AzureUpload; extern const Event AzureStageBlock; extern const Event AzureCommitBlockList; extern const Event DiskAzureCopyObject; - extern const Event DiskAzureUpload; extern const Event DiskAzureStageBlock; extern const Event DiskAzureCommitBlockList; } From b64c1dc711b3f52bfef9f05b13812acd6d683244 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 2 Jul 2024 17:20:29 +0200 Subject: [PATCH 247/439] Update index.md --- docs/en/sql-reference/window-functions/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 530eaae7283..01fae9d9040 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -80,8 +80,8 @@ These functions can be used only as a window function. - `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - `rank()` - Rank the current row within its partition with gaps. - `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x, offset)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. -- `leadInFrame(x, offset)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +- `lagInFrame(x[, offset[, default]])` - Return a value from the column x evaluated at the row that is a specified physical offset ahead of the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the default value is returned if specified; otherwise, a default value based on the column’s data type is used. +- `leadInFrame(x[, offset[, default]])` - Return a value from the column x evaluated at the row that is a specified number of offset rows ahead of the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the default value is used if specified; otherwise, the function returns a default value based on the column’s data type. ## Examples From 2598daa65aab4d58fae1cc5c69ebe9257c189f6b Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 2 Jul 2024 17:29:48 +0200 Subject: [PATCH 248/439] small fix of docs --- docs/en/sql-reference/window-functions/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 01fae9d9040..49076f3cbe1 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -80,8 +80,8 @@ These functions can be used only as a window function. - `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - `rank()` - Rank the current row within its partition with gaps. - `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x[, offset[, default]])` - Return a value from the column x evaluated at the row that is a specified physical offset ahead of the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the default value is returned if specified; otherwise, a default value based on the column’s data type is used. -- `leadInFrame(x[, offset[, default]])` - Return a value from the column x evaluated at the row that is a specified number of offset rows ahead of the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the default value is used if specified; otherwise, the function returns a default value based on the column’s data type. +- `lagInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the specified default value is returned. +- `leadInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the specified default value is used. ## Examples From b5af73a299986c457ba42f59c0d39a53ab4d9053 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Jul 2024 15:48:10 +0000 Subject: [PATCH 249/439] Better --- src/Client/ClientBase.cpp | 7 ++----- src/Client/ClientBase.h | 4 ++-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 56573c15f32..5d472ba99b9 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1206,11 +1206,8 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b if (local_format_error) std::rethrow_exception(local_format_error); - if (cancelled && is_interactive) - { + if (cancelled && is_interactive && !cancelled_printed.exchange(true)) output_stream << "Query was cancelled." << std::endl; - cancelled_printed = true; - } } @@ -1326,7 +1323,7 @@ void ClientBase::onEndOfStream() if (is_interactive) { - if (cancelled && !cancelled_printed) + if (cancelled && !cancelled_printed.exchange(true)) output_stream << "Query was cancelled." << std::endl; else if (!written_first_block) output_stream << "Ok." << std::endl; diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 756400137ad..30dc4168996 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -338,8 +338,8 @@ protected: bool allow_repeated_settings = false; bool allow_merge_tree_settings = false; - bool cancelled = false; - bool cancelled_printed = false; + std::atomic_bool cancelled = false; + std::atomic_bool cancelled_printed = false; /// Unpacked descriptors and streams for the ease of use. int in_fd = STDIN_FILENO; From 7a7633a2309a5f1286f6120513dd75a54aefa1b6 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Tue, 2 Jul 2024 15:17:56 +0000 Subject: [PATCH 250/439] Simplify logic in corner case to avoid comparing doubles The case for > bit_limit is already covered in previous branch, so we just need to cover the other case. This also fixes an overflow that was caused in previous check. e.g. b > B(word_size * n) if sizeof(B) is 1 byte but n is huge --- src/Functions/bitShiftLeft.cpp | 17 ++++++++--------- src/Functions/bitShiftRight.cpp | 16 ++++++++-------- ...ift_throws_error_for_out_of_bounds.reference | 2 ++ ...bit_shift_throws_error_for_out_of_bounds.sql | 2 ++ 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index 645672c50e2..99fd11114aa 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -40,13 +40,12 @@ struct BitShiftLeftImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; + const UInt8 word_size = 8 * sizeof(*pos); size_t n = end - pos; - if (b < 0 || b > B(word_size * n)) + const UInt256 bit_limit = word_size * n; + if (b < 0 || static_cast(b) > bit_limit) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); - - /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size)) + else if (b == bit_limit) { // insert default value out_vec.push_back(0); @@ -109,12 +108,12 @@ struct BitShiftLeftImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; + const UInt8 word_size = 8; size_t n = end - pos; - if (b < 0 || b > B(word_size * n)) + const UInt256 bit_limit = word_size * n; + if (b < 0 || static_cast(b) > bit_limit) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); - /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size)) + else if (b == bit_limit) { // insert default value out_vec.resize_fill(out_vec.size() + n); diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 2e9182d3fe6..bdc193c4be6 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -56,12 +56,12 @@ struct BitShiftRightImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; + const UInt8 word_size = 8; size_t n = end - pos; - if (b < 0 || b > B(word_size * n)) + const UInt256 bit_limit = word_size * n; + if (b < 0 || static_cast(b) > bit_limit) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); - /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size)) + else if (b == bit_limit) { /// insert default value out_vec.push_back(0); @@ -96,12 +96,12 @@ struct BitShiftRightImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; + const UInt8 word_size = 8; size_t n = end - pos; - if (b < 0 || b > B(word_size * n)) + const UInt256 bit_limit = word_size * n; + if (b < 0 || static_cast(b) > bit_limit) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); - /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size)) + else if (b == bit_limit) { // insert default value out_vec.resize_fill(out_vec.size() + n); diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference index d86bac9de59..33b8cd6ee26 100644 --- a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference @@ -1 +1,3 @@ +-- bitShiftRight +-- bitShiftLeft OK diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql index a1a246593d8..aec01753673 100644 --- a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql @@ -1,3 +1,4 @@ +SELECT '-- bitShiftRight'; SELECT bitShiftRight(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftRight(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftRight('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } @@ -5,6 +6,7 @@ SELECT bitShiftRight('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND SELECT bitShiftRight(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftRight(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT '-- bitShiftLeft'; SELECT bitShiftLeft(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftLeft(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftLeft('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } From 21f0eb2eecab17f6137639dc9162dbc4301d0d95 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 2 Jul 2024 18:13:49 +0200 Subject: [PATCH 251/439] Do not use async initialization of readers in s3queue --- .../StorageObjectStorageSource.cpp | 94 ++++++++----- .../StorageObjectStorageSource.h | 30 +++- .../ObjectStorageQueueIFileMetadata.cpp | 10 ++ .../ObjectStorageQueueIFileMetadata.h | 2 +- .../ObjectStorageQueueSource.cpp | 133 ++++++------------ .../ObjectStorageQueueSource.h | 36 ++--- .../StorageObjectStorageQueue.cpp | 41 +----- .../integration/test_storage_s3_queue/test.py | 1 + 8 files changed, 158 insertions(+), 189 deletions(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index aef783fc3c4..0f0aae7a6a5 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -65,7 +65,6 @@ StorageObjectStorageSource::StorageObjectStorageSource( CurrentMetrics::StorageObjectStorageThreadsActive, CurrentMetrics::StorageObjectStorageThreadsScheduled, 1/* max_threads */)) - , columns_desc(info.columns_description) , file_iterator(file_iterator_) , schema_cache(StorageObjectStorage::getSchemaCache(context_, configuration->getTypeName())) , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(*create_reader_pool, "Reader")) @@ -156,20 +155,20 @@ std::shared_ptr StorageObjectStorageSourc return iterator; } -void StorageObjectStorageSource::lazyInitialize(size_t processor) +void StorageObjectStorageSource::lazyInitialize() { if (initialized) return; - reader = createReader(processor); + reader = createReader(); if (reader) - reader_future = createReaderAsync(processor); + reader_future = createReaderAsync(); initialized = true; } Chunk StorageObjectStorageSource::generate() { - lazyInitialize(0); + lazyInitialize(); while (true) { @@ -236,27 +235,30 @@ void StorageObjectStorageSource::addNumRowsToCache(const ObjectInfo & object_inf schema_cache.addNumRows(cache_key, num_rows); } -std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfo & object_info) +StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader() { - const auto cache_key = getKeyForSchemaCache( - getUniqueStoragePathIdentifier(*configuration, object_info), - configuration->format, - format_settings, - getContext()); - - auto get_last_mod_time = [&]() -> std::optional - { - return object_info.metadata - ? std::optional(object_info.metadata->last_modified.epochTime()) - : std::nullopt; - }; - return schema_cache.tryGetNumRows(cache_key, get_last_mod_time); + return createReader( + 0, file_iterator, configuration, object_storage, read_from_format_info, format_settings, + key_condition, getContext(), &schema_cache, log, max_block_size, max_parsing_threads, need_only_count); } -StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader(size_t processor) +StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader( + size_t processor, + const std::shared_ptr & file_iterator, + const ConfigurationPtr & configuration, + const ObjectStoragePtr & object_storage, + const ReadFromFormatInfo & read_from_format_info, + const std::optional & format_settings, + const std::shared_ptr & key_condition_, + const ContextPtr & context_, + SchemaCache * schema_cache, + const LoggerPtr & log, + size_t max_block_size, + size_t max_parsing_threads, + bool need_only_count) { ObjectInfoPtr object_info; - auto query_settings = configuration->getQuerySettings(getContext()); + auto query_settings = configuration->getQuerySettings(context_); do { @@ -277,9 +279,29 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade std::shared_ptr source; std::unique_ptr read_buf; + auto try_get_num_rows_from_cache = [&]() -> std::optional + { + if (!schema_cache) + return std::nullopt; + + const auto cache_key = getKeyForSchemaCache( + getUniqueStoragePathIdentifier(*configuration, *object_info), + configuration->format, + format_settings, + context_); + + auto get_last_mod_time = [&]() -> std::optional + { + return object_info->metadata + ? std::optional(object_info->metadata->last_modified.epochTime()) + : std::nullopt; + }; + return schema_cache->tryGetNumRows(cache_key, get_last_mod_time); + }; + std::optional num_rows_from_cache = need_only_count - && getContext()->getSettingsRef().use_cache_for_count_from_files - ? tryGetNumRowsFromCache(*object_info) + && context_->getSettingsRef().use_cache_for_count_from_files + ? try_get_num_rows_from_cache() : std::nullopt; if (num_rows_from_cache) @@ -304,14 +326,14 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade else { compression_method = chooseCompressionMethod(object_info->getFileName(), configuration->compression_method); - read_buf = createReadBuffer(*object_info); + read_buf = createReadBuffer(*object_info, object_storage, context_, log); } auto input_format = FormatFactory::instance().getInput( configuration->format, *read_buf, read_from_format_info.format_header, - getContext(), + context_, max_block_size, format_settings, need_only_count ? 1 : max_parsing_threads, @@ -320,20 +342,20 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade compression_method, need_only_count); - if (key_condition) - input_format->setKeyCondition(key_condition); + if (key_condition_) + input_format->setKeyCondition(key_condition_); if (need_only_count) input_format->needOnlyCount(); builder.init(Pipe(input_format)); - if (columns_desc.hasDefaults()) + if (read_from_format_info.columns_description.hasDefaults()) { builder.addSimpleTransform( [&](const Block & header) { - return std::make_shared(header, columns_desc, *input_format, getContext()); + return std::make_shared(header, read_from_format_info.columns_description, *input_format, context_); }); } @@ -356,21 +378,25 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade object_info, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)); } -std::future StorageObjectStorageSource::createReaderAsync(size_t processor) +std::future StorageObjectStorageSource::createReaderAsync() { - return create_reader_scheduler([=, this] { return createReader(processor); }, Priority{}); + return create_reader_scheduler([=, this] { return createReader(); }, Priority{}); } -std::unique_ptr StorageObjectStorageSource::createReadBuffer(const ObjectInfo & object_info) +std::unique_ptr StorageObjectStorageSource::createReadBuffer( + const ObjectInfo & object_info, + const ObjectStoragePtr & object_storage, + const ContextPtr & context_, + const LoggerPtr & log) { const auto & object_size = object_info.metadata->size_bytes; - auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); + auto read_settings = context_->getReadSettings().adjustBufferSize(object_size); read_settings.enable_filesystem_cache = false; /// FIXME: Changing this setting to default value breaks something around parquet reading read_settings.remote_read_min_bytes_for_seek = read_settings.remote_fs_buffer_size; - const bool object_too_small = object_size <= 2 * getContext()->getSettings().max_download_buffer_size; + const bool object_too_small = object_size <= 2 * context_->getSettings().max_download_buffer_size; const bool use_prefetch = object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; read_settings.remote_fs_method = use_prefetch ? RemoteFSReadMethod::threadpool : RemoteFSReadMethod::read; /// User's object may change, don't cache it. diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index d93097d2636..c2bfff4b997 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -75,7 +75,6 @@ protected: const ReadFromFormatInfo read_from_format_info; const std::shared_ptr create_reader_pool; - ColumnsDescription columns_desc; std::shared_ptr file_iterator; SchemaCache & schema_cache; bool initialized = false; @@ -116,13 +115,32 @@ protected: std::future reader_future; /// Recreate ReadBuffer and Pipeline for each file. - ReaderHolder createReader(size_t processor = 0); - std::future createReaderAsync(size_t processor = 0); - std::unique_ptr createReadBuffer(const ObjectInfo & object_info); + static ReaderHolder createReader( + size_t processor, + const std::shared_ptr & file_iterator, + const ConfigurationPtr & configuration, + const ObjectStoragePtr & object_storage, + const ReadFromFormatInfo & read_from_format_info, + const std::optional & format_settings, + const std::shared_ptr & key_condition_, + const ContextPtr & context_, + SchemaCache * schema_cache, + const LoggerPtr & log, + size_t max_block_size, + size_t max_parsing_threads, + bool need_only_count); + + ReaderHolder createReader(); + + std::future createReaderAsync(); + static std::unique_ptr createReadBuffer( + const ObjectInfo & object_info, + const ObjectStoragePtr & object_storage, + const ContextPtr & context_, + const LoggerPtr & log); void addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows); - std::optional tryGetNumRowsFromCache(const ObjectInfo & object_info); - void lazyInitialize(size_t processor); + void lazyInitialize(); }; class StorageObjectStorageSource::IIterator diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp index 52ee0c9f8ed..6fac519849d 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp @@ -62,6 +62,11 @@ void ObjectStorageQueueIFileMetadata::FileStatus::onFailed(const std::string & e last_exception = exception; } +void ObjectStorageQueueIFileMetadata::FileStatus::updateState(State state_) +{ + state = state_; +} + std::string ObjectStorageQueueIFileMetadata::FileStatus::getException() const { std::lock_guard lock(last_exception_mutex); @@ -224,9 +229,14 @@ bool ObjectStorageQueueIFileMetadata::setProcessing() auto [success, file_state] = setProcessingImpl(); if (success) + { file_status->onProcessing(); + } else + { + LOG_TEST(log, "Updating state of {} from {} to {}", path, file_status->state.load(), file_state); file_status->updateState(file_state); + } LOG_TEST(log, "File {} has state `{}`: will {}process (processing id version: {})", path, file_state, success ? "" : "not ", diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h index 652b4742389..920beaa6f21 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h @@ -23,7 +23,7 @@ public: void onProcessing(); void onProcessed(); void onFailed(const std::string & exception); - void updateState(State state_) { state = state_; } + void updateState(State state_); std::string getException() const; diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 955e49bc2bf..683a7038bb6 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -357,41 +357,38 @@ ObjectStorageQueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t proc ObjectStorageQueueSource::ObjectStorageQueueSource( String name_, size_t processor_id_, - const Block & header_, - std::unique_ptr internal_source_, + std::shared_ptr file_iterator_, + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + const ReadFromFormatInfo & read_from_format_info_, + const std::optional & format_settings_, + const ObjectStorageQueueSettings & queue_settings_, std::shared_ptr files_metadata_, - const ObjectStorageQueueAction & action_, - RemoveFileFunc remove_file_func_, - const NamesAndTypesList & requested_virtual_columns_, ContextPtr context_, + size_t max_block_size_, const std::atomic & shutdown_called_, const std::atomic & table_is_being_dropped_, std::shared_ptr system_queue_log_, const StorageID & storage_id_, LoggerPtr log_, - size_t max_processed_files_before_commit_, - size_t max_processed_rows_before_commit_, - size_t max_processed_bytes_before_commit_, - size_t max_processing_time_sec_before_commit_, bool commit_once_processed_) - : ISource(header_) + : ISource(read_from_format_info_.source_header) , WithContext(context_) , name(std::move(name_)) , processor_id(processor_id_) - , action(action_) + , file_iterator(file_iterator_) + , configuration(configuration_) + , object_storage(object_storage_) + , read_from_format_info(read_from_format_info_) + , format_settings(format_settings_) + , queue_settings(queue_settings_) , files_metadata(files_metadata_) - , internal_source(std::move(internal_source_)) - , requested_virtual_columns(requested_virtual_columns_) + , max_block_size(max_block_size_) , shutdown_called(shutdown_called_) , table_is_being_dropped(table_is_being_dropped_) , system_queue_log(system_queue_log_) , storage_id(storage_id_) - , max_processed_files_before_commit(max_processed_files_before_commit_) - , max_processed_rows_before_commit(max_processed_rows_before_commit_) - , max_processed_bytes_before_commit(max_processed_bytes_before_commit_) - , max_processing_time_sec_before_commit(max_processing_time_sec_before_commit_) , commit_once_processed(commit_once_processed_) - , remove_file_func(remove_file_func_) , log(log_) { } @@ -401,21 +398,6 @@ String ObjectStorageQueueSource::getName() const return name; } -void ObjectStorageQueueSource::lazyInitialize(size_t processor) -{ - if (initialized) - return; - - LOG_TEST(log, "Initializing a new reader"); - - internal_source->lazyInitialize(processor); - reader = std::move(internal_source->reader); - if (reader) - reader_future = std::move(internal_source->reader_future); - - initialized = true; -} - Chunk ObjectStorageQueueSource::generate() { Chunk chunk; @@ -440,14 +422,21 @@ Chunk ObjectStorageQueueSource::generate() Chunk ObjectStorageQueueSource::generateImpl() { - lazyInitialize(processor_id); - - while (true) + while (!shutdown_called) { if (!reader) { - LOG_TEST(log, "No reader"); - break; + const auto context = getContext(); + reader = StorageObjectStorageSource::createReader( + processor_id, file_iterator, configuration, object_storage, read_from_format_info, + format_settings, nullptr, context, nullptr, log, max_block_size, + context->getSettingsRef().max_parsing_threads.value, /* need_only_count */false); + + if (!reader) + { + LOG_TEST(log, "No reader"); + break; + } } const auto * object_info = dynamic_cast(reader.getObjectInfo().get()); @@ -528,7 +517,7 @@ Chunk ObjectStorageQueueSource::generateImpl() total_processed_bytes += chunk.bytes(); VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( - chunk, requested_virtual_columns, + chunk, read_from_format_info.requested_virtual_columns, { .path = path, .size = reader.getObjectInfo()->metadata->size_bytes @@ -547,9 +536,6 @@ Chunk ObjectStorageQueueSource::generateImpl() if (processed_rows_from_file == 0) { - auto * file_iterator = dynamic_cast(internal_source->file_iterator.get()); - chassert(file_iterator); - if (file_status->retries < file_metadata->getMaxTries()) file_iterator->returnForRetry(reader.getObjectInfo()); @@ -564,11 +550,13 @@ Chunk ObjectStorageQueueSource::generateImpl() file_status->setProcessingEndTime(); file_status.reset(); + reader = {}; processed_rows_from_file = 0; processed_files.push_back(file_metadata); - if (processed_files.size() == max_processed_files_before_commit) + if (queue_settings.max_processed_files_before_commit + && processed_files.size() == queue_settings.max_processed_files_before_commit) { LOG_TRACE(log, "Number of max processed files before commit reached " "(rows: {}, bytes: {}, files: {})", @@ -576,68 +564,30 @@ Chunk ObjectStorageQueueSource::generateImpl() break; } - bool rows_or_bytes_or_time_limit_reached = false; - if (max_processed_rows_before_commit - && total_processed_rows == max_processed_rows_before_commit) + if (queue_settings.max_processed_rows_before_commit + && total_processed_rows == queue_settings.max_processed_rows_before_commit) { LOG_TRACE(log, "Number of max processed rows before commit reached " "(rows: {}, bytes: {}, files: {})", total_processed_rows, total_processed_bytes, processed_files.size()); - - rows_or_bytes_or_time_limit_reached = true; + break; } - else if (max_processed_bytes_before_commit - && total_processed_bytes == max_processed_bytes_before_commit) + else if (queue_settings.max_processed_bytes_before_commit + && total_processed_bytes == queue_settings.max_processed_bytes_before_commit) { LOG_TRACE(log, "Number of max processed bytes before commit reached " "(rows: {}, bytes: {}, files: {})", total_processed_rows, total_processed_bytes, processed_files.size()); - - rows_or_bytes_or_time_limit_reached = true; + break; } - else if (max_processing_time_sec_before_commit - && total_stopwatch.elapsedSeconds() >= max_processing_time_sec_before_commit) + else if (queue_settings.max_processing_time_sec_before_commit + && total_stopwatch.elapsedSeconds() >= queue_settings.max_processing_time_sec_before_commit) { LOG_TRACE(log, "Max processing time before commit reached " "(rows: {}, bytes: {}, files: {})", total_processed_rows, total_processed_bytes, processed_files.size()); - - rows_or_bytes_or_time_limit_reached = true; - } - - if (rows_or_bytes_or_time_limit_reached) - { - if (!reader_future.valid()) - break; - - LOG_TRACE(log, "Rows or bytes limit reached, but we have one more file scheduled already, " - "will process it despite the limit"); - } - - if (shutdown_called) - { - LOG_TRACE(log, "Shutdown was called, stopping sync"); break; } - - chassert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - { - LOG_TEST(log, "Reader finished"); - break; - } - - file_status = files_metadata->getFileStatus(reader.getObjectInfo()->getPath()); - - if (!rows_or_bytes_or_time_limit_reached && processed_files.size() + 1 < max_processed_files_before_commit) - { - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - internal_source->create_reader_pool->wait(); - reader_future = internal_source->createReaderAsync(processor_id); - } } return {}; @@ -681,12 +631,11 @@ void ObjectStorageQueueSource::commit(bool success, const std::string & exceptio void ObjectStorageQueueSource::applyActionAfterProcessing(const String & path) { - switch (action) + switch (queue_settings.after_processing.value) { case ObjectStorageQueueAction::DELETE: { - assert(remove_file_func); - remove_file_func(path); + object_storage->removeObject(StoredObject(path)); break; } case ObjectStorageQueueAction::KEEP: diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h index ccd87e8a269..fce2a426ecb 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h @@ -97,22 +97,20 @@ public: ObjectStorageQueueSource( String name_, size_t processor_id_, - const Block & header_, - std::unique_ptr internal_source_, + std::shared_ptr file_iterator_, + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + const ReadFromFormatInfo & read_from_format_info_, + const std::optional & format_settings_, + const ObjectStorageQueueSettings & queue_settings_, std::shared_ptr files_metadata_, - const ObjectStorageQueueAction & action_, - RemoveFileFunc remove_file_func_, - const NamesAndTypesList & requested_virtual_columns_, ContextPtr context_, + size_t max_block_size_, const std::atomic & shutdown_called_, const std::atomic & table_is_being_dropped_, std::shared_ptr system_queue_log_, const StorageID & storage_id_, LoggerPtr log_, - size_t max_processed_files_before_commit_, - size_t max_processed_rows_before_commit_, - size_t max_processed_bytes_before_commit_, - size_t max_processing_time_sec_before_commit_, bool commit_once_processed_); static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); @@ -128,29 +126,27 @@ public: private: const String name; const size_t processor_id; - const ObjectStorageQueueAction action; + const std::shared_ptr file_iterator; + const ConfigurationPtr configuration; + const ObjectStoragePtr object_storage; + const ReadFromFormatInfo read_from_format_info; + const std::optional format_settings; + const ObjectStorageQueueSettings queue_settings; const std::shared_ptr files_metadata; - const std::shared_ptr internal_source; - const NamesAndTypesList requested_virtual_columns; + const size_t max_block_size; + const std::atomic & shutdown_called; const std::atomic & table_is_being_dropped; const std::shared_ptr system_queue_log; const StorageID storage_id; - const size_t max_processed_files_before_commit; - const size_t max_processed_rows_before_commit; - const size_t max_processed_bytes_before_commit; - const size_t max_processing_time_sec_before_commit; const bool commit_once_processed; - RemoveFileFunc remove_file_func; LoggerPtr log; std::vector processed_files; std::vector failed_during_read_files; Source::ReaderHolder reader; - std::future reader_future; - std::atomic initialized{false}; size_t processed_rows_from_file = 0; size_t total_processed_rows = 0; @@ -165,8 +161,6 @@ private: ObjectStorageQueueMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); - - void lazyInitialize(size_t processor); }; } diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 95265cde9ea..4388864434e 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -352,43 +352,14 @@ std::shared_ptr StorageObjectStorageQueue::createSourc ContextPtr local_context, bool commit_once_processed) { - auto internal_source = std::make_unique( - getName(), - object_storage, - configuration, - info, - format_settings, - local_context, - max_block_size, - file_iterator, - local_context->getSettingsRef().max_download_threads, - false); - - auto file_deleter = [=, this](const std::string & path) mutable - { - object_storage->removeObject(StoredObject(path)); - }; - return std::make_shared( - getName(), - processor_id, - info.source_header, - std::move(internal_source), - files_metadata, - queue_settings->after_processing, - file_deleter, - info.requested_virtual_columns, - local_context, - shutdown_called, - table_is_being_dropped, + getName(), processor_id, + file_iterator, configuration, object_storage, + info, format_settings, + *queue_settings, files_metadata, + local_context, max_block_size, shutdown_called, table_is_being_dropped, getQueueLog(object_storage, local_context, *queue_settings), - getStorageID(), - log, - queue_settings->max_processed_files_before_commit, - queue_settings->max_processed_rows_before_commit, - queue_settings->max_processed_bytes_before_commit, - queue_settings->max_processing_time_sec_before_commit, - commit_once_processed); + getStorageID(), log, commit_once_processed); } bool StorageObjectStorageQueue::hasDependencies(const StorageID & table_id) diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index b93e560d5b9..bf3c28c5429 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -1780,6 +1780,7 @@ def test_commit_on_limit(started_cluster): if "test_999999.csv" in get_processed_files(): break time.sleep(1) + assert "test_999999.csv" in get_processed_files() assert 1 == int( From a020138764ff01f6a89c05824d9236bad13459ce Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Tue, 2 Jul 2024 18:15:57 +0200 Subject: [PATCH 252/439] Tests: add tests for new analyzer --- .../03199_queries_with_new_analyzer.reference | 27 ++++++++++++ .../03199_queries_with_new_analyzer.sql | 41 +++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 tests/queries/0_stateless/03199_queries_with_new_analyzer.reference create mode 100644 tests/queries/0_stateless/03199_queries_with_new_analyzer.sql diff --git a/tests/queries/0_stateless/03199_queries_with_new_analyzer.reference b/tests/queries/0_stateless/03199_queries_with_new_analyzer.reference new file mode 100644 index 00000000000..10ce589000d --- /dev/null +++ b/tests/queries/0_stateless/03199_queries_with_new_analyzer.reference @@ -0,0 +1,27 @@ +5 (4230072075578472911,4230072075578472911) 71789584853496063 +2 (4401188181514187637,4401188181514187637) 878466845199253299 +4 (4940826638032106783,4940826638032106783) 3675164899122807807 +6 (10957420562507184961,10957420562507184961) 3732623117916254211 +0 (797076400500506358,797076400500506358) 3746094338409299772 +7 (10843611042193511775,10843611042193511775) 4607251742847087615 +3 (12588286986351526898,12588286986351526898) 13889114719560662796 +8 (452995860660674674,452995860660674674) 17365664920787500812 +9 (12206106972241516904,12206106972241516904) 17567684527097330880 +1 (14558425114501132193,14558425114501132193) 18445898820068822019 +3 255 255 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 +1 +2 +3 +4 +5 +6 diff --git a/tests/queries/0_stateless/03199_queries_with_new_analyzer.sql b/tests/queries/0_stateless/03199_queries_with_new_analyzer.sql new file mode 100644 index 00000000000..c32d7524492 --- /dev/null +++ b/tests/queries/0_stateless/03199_queries_with_new_analyzer.sql @@ -0,0 +1,41 @@ +SET allow_experimental_analyzer=1; + +SELECT *, ngramMinHash(*) AS minhash, mortonEncode(untuple(ngramMinHash(*))) AS z +FROM (SELECT toString(number) FROM numbers(10)) +ORDER BY z LIMIT 100; + +CREATE TABLE test ( + idx UInt64, + coverage Array(UInt64), + test_name String +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO test VALUES (10, [0,1,2,3], 'xx'), (20, [3,4,5,6], 'xxx'), (90, [3,4,5,6,9], 'xxxx'); + +WITH + 4096 AS w, 4096 AS h, w * h AS pixels, + arrayJoin(coverage) AS num, + num DIV (32768 * 32768 DIV pixels) AS idx, + mortonDecode(2, idx) AS coord, + 255 AS b, + least(255, uniq(test_name)) AS r, + 255 * uniq(test_name) / (max(uniq(test_name)) OVER ()) AS g +SELECT r::UInt8, g::UInt8, b::UInt8 +FROM test +GROUP BY coord +ORDER BY coord.2 * w + coord.1 +WITH FILL FROM 0 TO 10; + + +CREATE TABLE seq ( + number UInt64 +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO seq VALUES (0), (6), (7); + +WITH (Select min(number), max(number) from seq) as range Select * from numbers(range.1, range.2); + From 4f66a6651afc9f655c820fedf6a6163fc4942fe9 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Tue, 2 Jul 2024 18:16:22 +0200 Subject: [PATCH 253/439] Tests: remove +x flag from 03167_base64_url_functions_sh.reference --- tests/queries/0_stateless/03167_base64_url_functions_sh.reference | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tests/queries/0_stateless/03167_base64_url_functions_sh.reference diff --git a/tests/queries/0_stateless/03167_base64_url_functions_sh.reference b/tests/queries/0_stateless/03167_base64_url_functions_sh.reference old mode 100755 new mode 100644 From 389a86ec059461f20521e4dd2c7888e2b3b37623 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Tue, 2 Jul 2024 16:20:42 +0000 Subject: [PATCH 254/439] Sort error codes alphabetically --- src/Functions/bitShiftLeft.cpp | 2 +- src/Functions/bitShiftRight.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index 99fd11114aa..8e39ed86461 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -5,9 +5,9 @@ namespace DB { namespace ErrorCodes { + extern const int ARGUMENT_OUT_OF_BOUND; extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; - extern const int ARGUMENT_OUT_OF_BOUND; } namespace diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index bdc193c4be6..46cfcde8a33 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -6,9 +6,9 @@ namespace DB { namespace ErrorCodes { + extern const int ARGUMENT_OUT_OF_BOUND; extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; - extern const int ARGUMENT_OUT_OF_BOUND; } namespace From 0ed34661243e918a81d8823dbec8917ef88ab3b2 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 15:30:16 +0000 Subject: [PATCH 255/439] Cleanup FunctionArgumentDescriptor const char * can be nullptr, std::string_view can't. --- src/Functions/FunctionHelpers.cpp | 33 ++++--- src/Functions/FunctionHelpers.h | 85 ++++++++----------- src/Functions/FunctionStringReplace.h | 2 +- src/Functions/FunctionTokens.h | 4 +- src/Functions/FunctionUnixTimestamp64.h | 2 +- src/Functions/FunctionsAES.h | 4 +- src/Functions/FunctionsConversion.cpp | 6 +- src/Functions/FunctionsRound.h | 2 +- src/Functions/JSONArrayLength.cpp | 2 +- src/Functions/URL/URLHierarchy.cpp | 2 +- src/Functions/URL/URLPathHierarchy.cpp | 2 +- .../URL/extractURLParameterNames.cpp | 2 +- src/Functions/URL/extractURLParameters.cpp | 2 +- src/Functions/array/arrayJaccardIndex.cpp | 2 +- src/Functions/array/arrayRandomSample.cpp | 2 +- src/Functions/array/arrayShingles.cpp | 2 +- src/Functions/arrayStringConcat.cpp | 2 +- src/Functions/castOrDefault.cpp | 6 +- src/Functions/countMatches.h | 2 +- src/Functions/dateTimeToSnowflakeID.cpp | 4 +- src/Functions/extractAll.cpp | 2 +- src/Functions/extractAllGroups.h | 2 +- src/Functions/extractGroups.cpp | 2 +- src/Functions/formatQuery.cpp | 2 +- src/Functions/fromDaysSinceYearZero.cpp | 2 +- src/Functions/generateSnowflakeID.cpp | 2 +- src/Functions/generateUUIDv4.cpp | 4 +- src/Functions/generateUUIDv7.cpp | 2 +- src/Functions/makeDate.cpp | 14 +-- src/Functions/parseDateTime.cpp | 2 +- src/Functions/parseReadableSize.cpp | 2 +- src/Functions/regexpExtract.cpp | 2 +- src/Functions/repeat.cpp | 2 +- src/Functions/seriesDecomposeSTL.cpp | 2 +- src/Functions/seriesOutliersDetectTukey.cpp | 2 +- src/Functions/snowflake.cpp | 8 +- src/Functions/snowflakeIDToDateTime.cpp | 4 +- src/Functions/space.cpp | 2 +- src/Functions/timestamp.cpp | 2 +- src/Functions/toDecimalString.cpp | 2 +- 40 files changed, 108 insertions(+), 122 deletions(-) diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index 593646240ca..0027f9f281f 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -97,7 +97,7 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName void validateArgumentType(const IFunction & func, const DataTypes & arguments, size_t argument_index, bool (* validator_func)(const IDataType &), - const char * expected_type_description) + const char * type_name) { if (arguments.size() <= argument_index) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of arguments of function {}", @@ -106,7 +106,7 @@ void validateArgumentType(const IFunction & func, const DataTypes & arguments, const auto & argument = arguments[argument_index]; if (!validator_func(*argument)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {}, expected {}", - argument->getName(), std::to_string(argument_index), func.getName(), expected_type_description); + argument->getName(), argument_index, func.getName(), type_name); } namespace @@ -120,9 +120,7 @@ void validateArgumentsImpl(const IFunction & func, { const auto argument_index = i + argument_offset; if (argument_index >= arguments.size()) - { break; - } const auto & arg = arguments[i + argument_offset]; const auto & descriptor = descriptors[i]; @@ -130,10 +128,10 @@ void validateArgumentsImpl(const IFunction & func, throw Exception(error_code, "Illegal type of argument #{}{} of function {}{}{}", argument_offset + i + 1, // +1 is for human-friendly 1-based indexing - (descriptor.argument_name ? " '" + std::string(descriptor.argument_name) + "'" : String{}), + " '" + String(descriptor.name) + "'", func.getName(), - (descriptor.expected_type_description ? String(", expected ") + descriptor.expected_type_description : String{}), - (arg.type ? ", got " + arg.type->getName() : String{})); + String(", expected ") + String(descriptor.type_name), + arg.type ? ", got " + arg.type->getName() : String{}); } } @@ -141,19 +139,22 @@ void validateArgumentsImpl(const IFunction & func, int FunctionArgumentDescriptor::isValid(const DataTypePtr & data_type, const ColumnPtr & column) const { - if (type_validator_func && (data_type == nullptr || !type_validator_func(*data_type))) + if (name.empty() || type_name.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "name or type_name are not set"); + + if (type_validator && (data_type == nullptr || !type_validator(*data_type))) return ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT; - if (column_validator_func && (column == nullptr || !column_validator_func(*column))) + if (column_validator && (column == nullptr || !column_validator(*column))) return ErrorCodes::ILLEGAL_COLUMN; return 0; } -void validateFunctionArgumentTypes(const IFunction & func, - const ColumnsWithTypeAndName & arguments, - const FunctionArgumentDescriptors & mandatory_args, - const FunctionArgumentDescriptors & optional_args) +void validateFunctionArguments(const IFunction & func, + const ColumnsWithTypeAndName & arguments, + const FunctionArgumentDescriptors & mandatory_args, + const FunctionArgumentDescriptors & optional_args) { if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size()) { @@ -165,10 +166,8 @@ void validateFunctionArgumentTypes(const IFunction & func, using A = std::decay_t; if constexpr (std::is_same_v) { - if (a.argument_name) - result += "'" + std::string(a.argument_name) + "' : "; - if (a.expected_type_description) - result += a.expected_type_description; + result += "'" + String(a.name) + "' : "; + result += a.type_name; } else if constexpr (std::is_same_v) result += a.type->getName(); diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h index 6267d8eacc4..c08eb5265c1 100644 --- a/src/Functions/FunctionHelpers.h +++ b/src/Functions/FunctionHelpers.h @@ -119,73 +119,60 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName /// throws if there is no argument at specified index or if predicate returns false. void validateArgumentType(const IFunction & func, const DataTypes & arguments, size_t argument_index, bool (* validator_func)(const IDataType &), - const char * expected_type_description); + const char * type_name); -/** Simple validator that is used in conjunction with validateFunctionArgumentTypes() to check if function arguments are as expected - * - * Also it is used to generate function description when arguments do not match expected ones. - * Any field can be null: - * `argument_name` - if not null, reported via type check errors. - * `expected_type_description` - if not null, reported via type check errors. - * `type_validator_func` - if not null, used to validate data type of function argument. - * `column_validator_func` - if not null, used to validate column of function argument. - */ +/// Expected arguments for a function. Can be used in conjunction with validateFunctionArguments() to check that the user-provided +/// arguments match the expected arguments. struct FunctionArgumentDescriptor { - const char * argument_name; + /// The argument name, e.g. "longitude". + /// Should not be empty. + std::string_view name; + /// A function which validates the argument data type. + /// May be nullptr. using TypeValidator = bool (*)(const IDataType &); - TypeValidator type_validator_func; + TypeValidator type_validator; + + /// A function which validates the argument column. + /// May be nullptr. using ColumnValidator = bool (*)(const IColumn &); - ColumnValidator column_validator_func; + ColumnValidator column_validator; - const char * expected_type_description; + /// The expected argument type, e.g. "const String" or "UInt64". + /// Should not be empty. + std::string_view type_name; - /** Validate argument type and column. - * - * Returns non-zero error code if: - * Validator != nullptr && (Value == nullptr || Validator(*Value) == false) - * For: - * Validator is either `type_validator_func` or `column_validator_func` - * Value is either `data_type` or `column` respectively. - * ILLEGAL_TYPE_OF_ARGUMENT if type validation fails - * - */ + /// Validate argument type and column. int isValid(const DataTypePtr & data_type, const ColumnPtr & column) const; }; using FunctionArgumentDescriptors = std::vector; -/** Validate that function arguments match specification. - * - * Designed to simplify argument validation for functions with variable arguments - * (e.g. depending on result type or other trait). - * First, checks that number of arguments is as expected (including optional arguments). - * Second, checks that mandatory args present and have valid type. - * Third, checks optional arguments types, skipping ones that are missing. - * - * Please note that if you have several optional arguments, like f([a, b, c]), - * only these calls are considered valid: - * f(a) - * f(a, b) - * f(a, b, c) - * - * But NOT these: f(a, c), f(b, c) - * In other words you can't omit middle optional arguments (just like in regular C++). - * - * If any mandatory arg is missing, throw an exception, with explicit description of expected arguments. - */ -void validateFunctionArgumentTypes(const IFunction & func, const ColumnsWithTypeAndName & arguments, - const FunctionArgumentDescriptors & mandatory_args, - const FunctionArgumentDescriptors & optional_args = {}); +/// Validates that the user-provided arguments match the expected arguments. +/// +/// Checks that +/// - the number of provided arguments matches the number of mandatory/optional arguments, +/// - all mandatory arguments are present and have the right type, +/// - optional arguments - if present - have the right type. +/// +/// With multiple optional arguments, e.g. f([a, b, c]), provided arguments must match left-to-right. E.g. these calls are considered valid: +/// f(a) +/// f(a, b) +/// f(a, b, c) +/// but these are NOT: +/// f(a, c) +/// f(b, c) +void validateFunctionArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments, + const FunctionArgumentDescriptors & mandatory_args, + const FunctionArgumentDescriptors & optional_args = {}); /// Checks if a list of array columns have equal offsets. Return a pair of nested columns and offsets if true, otherwise throw. std::pair, const ColumnArray::Offset *> checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments); -/** Return ColumnNullable of src, with null map as OR-ed null maps of args columns. - * Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL. - */ +/// Return ColumnNullable of src, with null map as OR-ed null maps of args columns. +/// Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL. ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count); struct NullPresence diff --git a/src/Functions/FunctionStringReplace.h b/src/Functions/FunctionStringReplace.h index aee04a5969a..b4bcfa514a8 100644 --- a/src/Functions/FunctionStringReplace.h +++ b/src/Functions/FunctionStringReplace.h @@ -40,7 +40,7 @@ public: {"replacement", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h index d6cf6a24983..0ca47126198 100644 --- a/src/Functions/FunctionTokens.h +++ b/src/Functions/FunctionTokens.h @@ -194,7 +194,7 @@ static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings( {"max_substrings", static_cast(&isNativeInteger), isColumnConst, "const Number"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args); + validateFunctionArguments(func, arguments, mandatory_args, optional_args); } static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & func, const ColumnsWithTypeAndName & arguments) @@ -207,7 +207,7 @@ static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & fun {"max_substrings", static_cast(&isNativeInteger), isColumnConst, "const Number"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args); + validateFunctionArguments(func, arguments, mandatory_args, optional_args); } } diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index c418163343b..e282bcfbfe2 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -47,7 +47,7 @@ public: FunctionArgumentDescriptors args{ {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 524b4f82acd..7af6265eba9 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -165,7 +165,7 @@ private: }); } - validateFunctionArgumentTypes(*this, arguments, + validateFunctionArguments(*this, arguments, FunctionArgumentDescriptors{ {"mode", static_cast(&isStringOrFixedString), isColumnConst, "encryption mode string"}, {"input", static_cast(&isStringOrFixedString), {}, "plaintext"}, @@ -438,7 +438,7 @@ private: }); } - validateFunctionArgumentTypes(*this, arguments, + validateFunctionArguments(*this, arguments, FunctionArgumentDescriptors{ {"mode", static_cast(&isStringOrFixedString), isColumnConst, "decryption mode string"}, {"input", static_cast(&isStringOrFixedString), {}, "ciphertext"}, diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 2a0b2f1d075..f3e54d2fbd9 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -2020,7 +2020,7 @@ public: DataTypePtr getReturnTypeImplRemovedNullable(const ColumnsWithTypeAndName & arguments) const { - FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; + FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, "any type"}}; FunctionArgumentDescriptors optional_args; if constexpr (to_decimal) @@ -2049,7 +2049,7 @@ public: optional_args.push_back({"timezone", static_cast(&isString), nullptr, "String"}); } - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); if constexpr (std::is_same_v) { @@ -2390,7 +2390,7 @@ public: if (isDateTime64(arguments)) { - validateFunctionArgumentTypes(*this, arguments, + validateFunctionArguments(*this, arguments, FunctionArgumentDescriptors{{"string", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}}, // optional FunctionArgumentDescriptors{ diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index 08e257de8ac..7a907e56a7d 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -647,7 +647,7 @@ public: FunctionArgumentDescriptors optional_args{ {"N", static_cast(&isNativeInteger), nullptr, "The number of decimal places to round to"}, }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return arguments[0].type; } diff --git a/src/Functions/JSONArrayLength.cpp b/src/Functions/JSONArrayLength.cpp index 84e87061398..73dd55f1266 100644 --- a/src/Functions/JSONArrayLength.cpp +++ b/src/Functions/JSONArrayLength.cpp @@ -48,7 +48,7 @@ namespace {"json", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/URL/URLHierarchy.cpp b/src/Functions/URL/URLHierarchy.cpp index c08f41f06ee..0f565df8172 100644 --- a/src/Functions/URL/URLHierarchy.cpp +++ b/src/Functions/URL/URLHierarchy.cpp @@ -32,7 +32,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/URL/URLPathHierarchy.cpp b/src/Functions/URL/URLPathHierarchy.cpp index 7c796116b8d..2cb5995e375 100644 --- a/src/Functions/URL/URLPathHierarchy.cpp +++ b/src/Functions/URL/URLPathHierarchy.cpp @@ -30,7 +30,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/URL/extractURLParameterNames.cpp b/src/Functions/URL/extractURLParameterNames.cpp index 16ace36d39b..b3d51d02162 100644 --- a/src/Functions/URL/extractURLParameterNames.cpp +++ b/src/Functions/URL/extractURLParameterNames.cpp @@ -30,7 +30,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/URL/extractURLParameters.cpp b/src/Functions/URL/extractURLParameters.cpp index 43079834872..ce2aadaeede 100644 --- a/src/Functions/URL/extractURLParameters.cpp +++ b/src/Functions/URL/extractURLParameters.cpp @@ -31,7 +31,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {} diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index 87f3390ac73..7db20667888 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -87,7 +87,7 @@ public: {"array_1", static_cast(&isArray), nullptr, "Array"}, {"array_2", static_cast(&isArray), nullptr, "Array"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared>(); } diff --git a/src/Functions/array/arrayRandomSample.cpp b/src/Functions/array/arrayRandomSample.cpp index b08a73b93f3..6e176b6e33d 100644 --- a/src/Functions/array/arrayRandomSample.cpp +++ b/src/Functions/array/arrayRandomSample.cpp @@ -39,7 +39,7 @@ public: {"array", static_cast(&isArray), nullptr, "Array"}, {"samples", static_cast(&isUInt), isColumnConst, "const UInt*"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); // Return an array with the same nested type as the input array const DataTypePtr & array_type = arguments[0].type; diff --git a/src/Functions/array/arrayShingles.cpp b/src/Functions/array/arrayShingles.cpp index 8932482c69c..7c97d8136fb 100644 --- a/src/Functions/array/arrayShingles.cpp +++ b/src/Functions/array/arrayShingles.cpp @@ -31,7 +31,7 @@ public: {"array", static_cast(&isArray), nullptr, "Array"}, {"length", static_cast(&isInteger), nullptr, "Integer"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); const DataTypeArray * array_type = checkAndGetDataType(arguments[0].type.get()); return std::make_shared(std::make_shared(array_type->getNestedType())); diff --git a/src/Functions/arrayStringConcat.cpp b/src/Functions/arrayStringConcat.cpp index 421408c01f2..12bab410fec 100644 --- a/src/Functions/arrayStringConcat.cpp +++ b/src/Functions/arrayStringConcat.cpp @@ -159,7 +159,7 @@ public: {"separator", static_cast(&isString), isColumnConst, "const String"}, }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 44b39811882..995b5fa91e7 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -203,7 +203,7 @@ private: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; + FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, "any type"}}; FunctionArgumentDescriptors optional_args; if (isDecimal(type) || isDateTime64(type)) @@ -212,9 +212,9 @@ private: if (isDateTimeOrDateTime64(type)) optional_args.push_back({"timezone", static_cast(&isString), isColumnConst, "const String"}); - optional_args.push_back({"default_value", nullptr, nullptr, nullptr}); + optional_args.push_back({"default_value", nullptr, nullptr, "any type"}); - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); size_t additional_argument_index = 1; diff --git a/src/Functions/countMatches.h b/src/Functions/countMatches.h index fbbb9d017ee..5f07b936e26 100644 --- a/src/Functions/countMatches.h +++ b/src/Functions/countMatches.h @@ -38,7 +38,7 @@ public: {"haystack", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}, {"pattern", static_cast(&isString), isColumnConst, "constant String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/dateTimeToSnowflakeID.cpp b/src/Functions/dateTimeToSnowflakeID.cpp index 968a7628ca5..c48f8c13152 100644 --- a/src/Functions/dateTimeToSnowflakeID.cpp +++ b/src/Functions/dateTimeToSnowflakeID.cpp @@ -43,7 +43,7 @@ public: FunctionArgumentDescriptors optional_args{ {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); return std::make_shared(); } @@ -91,7 +91,7 @@ public: FunctionArgumentDescriptors optional_args{ {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); return std::make_shared(); } diff --git a/src/Functions/extractAll.cpp b/src/Functions/extractAll.cpp index 5801a7b8f4f..4a3eb32474c 100644 --- a/src/Functions/extractAll.cpp +++ b/src/Functions/extractAll.cpp @@ -59,7 +59,7 @@ public: {"pattern", static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index dfcd0e31715..7732855b211 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -74,7 +74,7 @@ public: {"haystack", static_cast(&isStringOrFixedString), nullptr, "const String or const FixedString"}, {"needle", static_cast(&isStringOrFixedString), isColumnConst, "const String or const FixedString"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); /// Two-dimensional array of strings, each `row` of top array represents matching groups. return std::make_shared(std::make_shared(std::make_shared())); diff --git a/src/Functions/extractGroups.cpp b/src/Functions/extractGroups.cpp index f62352af0bd..ac6266a2e82 100644 --- a/src/Functions/extractGroups.cpp +++ b/src/Functions/extractGroups.cpp @@ -48,7 +48,7 @@ public: {"haystack", static_cast(&isStringOrFixedString), nullptr, "const String or const FixedString"}, {"needle", static_cast(&isStringOrFixedString), isColumnConst, "const String or const FixedString"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index 3b632147864..655ea2e7cde 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -54,7 +54,7 @@ public: FunctionArgumentDescriptors args{ {"query", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); DataTypePtr string_type = std::make_shared(); if (error_handling == ErrorHandling::Null) diff --git a/src/Functions/fromDaysSinceYearZero.cpp b/src/Functions/fromDaysSinceYearZero.cpp index b98c587d172..0543e6bf229 100644 --- a/src/Functions/fromDaysSinceYearZero.cpp +++ b/src/Functions/fromDaysSinceYearZero.cpp @@ -54,7 +54,7 @@ public: { FunctionArgumentDescriptors args{{"days", static_cast(&isNativeInteger), nullptr, "Integer"}}; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 8ac010deafc..a171b6bf86e 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -167,7 +167,7 @@ public: FunctionArgumentDescriptors optional_args{ {"expr", nullptr, nullptr, "Arbitrary expression"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index b0fec43fe94..a928f9009c8 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -30,9 +30,9 @@ public: { FunctionArgumentDescriptors mandatory_args; FunctionArgumentDescriptors optional_args{ - {"expr", nullptr, nullptr, "Arbitrary Expression"} + {"expr", nullptr, nullptr, "any type"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/generateUUIDv7.cpp b/src/Functions/generateUUIDv7.cpp index b226c0840f4..a9ed08d9f83 100644 --- a/src/Functions/generateUUIDv7.cpp +++ b/src/Functions/generateUUIDv7.cpp @@ -163,7 +163,7 @@ public: FunctionArgumentDescriptors optional_args{ {"expr", nullptr, nullptr, "Arbitrary expression"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index 3d8b8617472..41a09793994 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -87,7 +87,7 @@ public: {mandatory_argument_names_year_month_day[1], static_cast(&isNumber), nullptr, "Number"}, {mandatory_argument_names_year_month_day[2], static_cast(&isNumber), nullptr, "Number"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); } else { @@ -95,7 +95,7 @@ public: {mandatory_argument_names_year_dayofyear[0], static_cast(&isNumber), nullptr, "Number"}, {mandatory_argument_names_year_dayofyear[1], static_cast(&isNumber), nullptr, "Number"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); } return std::make_shared(); @@ -193,7 +193,7 @@ public: {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } @@ -357,7 +357,7 @@ public: {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); /// Optional timezone argument std::string timezone; @@ -440,7 +440,7 @@ public: {optional_argument_names[2], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); if (arguments.size() >= mandatory_argument_names.size() + 1) { @@ -572,7 +572,7 @@ public: {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); /// Optional timezone argument std::string timezone; @@ -652,7 +652,7 @@ public: {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); /// Optional precision argument auto precision = DEFAULT_PRECISION; diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 162b8c58873..339eb4cb26c 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -589,7 +589,7 @@ namespace {"timezone", static_cast(&isString), &isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); String time_zone_name = getTimeZone(arguments).getTimeZone(); DataTypePtr date_type = std::make_shared(time_zone_name); diff --git a/src/Functions/parseReadableSize.cpp b/src/Functions/parseReadableSize.cpp index f5c2c53439b..1abcf7f164f 100644 --- a/src/Functions/parseReadableSize.cpp +++ b/src/Functions/parseReadableSize.cpp @@ -68,7 +68,7 @@ public: { {"readable_size", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); DataTypePtr return_type = std::make_shared(); if constexpr (error_handling == ErrorHandling::Null) return std::make_shared(return_type); diff --git a/src/Functions/regexpExtract.cpp b/src/Functions/regexpExtract.cpp index cfb42580cb0..3cc5393296c 100644 --- a/src/Functions/regexpExtract.cpp +++ b/src/Functions/regexpExtract.cpp @@ -54,7 +54,7 @@ public: if (arguments.size() == 3) args.emplace_back(FunctionArgumentDescriptor{"index", static_cast(&isInteger), nullptr, "Integer"}); - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/repeat.cpp b/src/Functions/repeat.cpp index 7f2fe646062..aa90bf2490d 100644 --- a/src/Functions/repeat.cpp +++ b/src/Functions/repeat.cpp @@ -201,7 +201,7 @@ public: {"n", static_cast(&isInteger), nullptr, "Integer"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/seriesDecomposeSTL.cpp b/src/Functions/seriesDecomposeSTL.cpp index 618808b64ed..720aa1e0799 100644 --- a/src/Functions/seriesDecomposeSTL.cpp +++ b/src/Functions/seriesDecomposeSTL.cpp @@ -45,7 +45,7 @@ public: {"time_series", static_cast(&isArray), nullptr, "Array"}, {"period", static_cast(&isNativeUInt), nullptr, "Unsigned Integer"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared(std::make_shared())); } diff --git a/src/Functions/seriesOutliersDetectTukey.cpp b/src/Functions/seriesOutliersDetectTukey.cpp index 81fc904e16e..4063d0ab85b 100644 --- a/src/Functions/seriesOutliersDetectTukey.cpp +++ b/src/Functions/seriesOutliersDetectTukey.cpp @@ -51,7 +51,7 @@ public: {"max_percentile", static_cast(&isFloat), isColumnConst, "Number"}, {"k", static_cast(&isNativeNumber), isColumnConst, "Number"}}; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp index 5ff8a636058..31ea6a28ece 100644 --- a/src/Functions/snowflake.cpp +++ b/src/Functions/snowflake.cpp @@ -64,7 +64,7 @@ public: FunctionArgumentDescriptors args{ {"value", static_cast(&isDateTime), nullptr, "DateTime"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } @@ -121,7 +121,7 @@ public: FunctionArgumentDescriptors optional_args{ {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); String timezone; if (arguments.size() == 2) @@ -190,7 +190,7 @@ public: FunctionArgumentDescriptors args{ {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } @@ -255,7 +255,7 @@ public: FunctionArgumentDescriptors optional_args{ {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); String timezone; if (arguments.size() == 2) diff --git a/src/Functions/snowflakeIDToDateTime.cpp b/src/Functions/snowflakeIDToDateTime.cpp index b799792a56f..9a1d5b8a74b 100644 --- a/src/Functions/snowflakeIDToDateTime.cpp +++ b/src/Functions/snowflakeIDToDateTime.cpp @@ -56,7 +56,7 @@ public: {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"}, {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); String timezone; if (arguments.size() == 3) @@ -127,7 +127,7 @@ public: {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"}, {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); String timezone; if (arguments.size() == 3) diff --git a/src/Functions/space.cpp b/src/Functions/space.cpp index 83183c991bc..ce12f2f541c 100644 --- a/src/Functions/space.cpp +++ b/src/Functions/space.cpp @@ -48,7 +48,7 @@ public: {"n", static_cast(&isInteger), nullptr, "Integer"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/timestamp.cpp b/src/Functions/timestamp.cpp index fbca08b0968..6f2bd2030d5 100644 --- a/src/Functions/timestamp.cpp +++ b/src/Functions/timestamp.cpp @@ -46,7 +46,7 @@ public: FunctionArgumentDescriptors optional_args{ {"time", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(DATETIME_SCALE); } diff --git a/src/Functions/toDecimalString.cpp b/src/Functions/toDecimalString.cpp index fc621b272de..4ee664ad237 100644 --- a/src/Functions/toDecimalString.cpp +++ b/src/Functions/toDecimalString.cpp @@ -43,7 +43,7 @@ public: {"precision", static_cast(&isNativeInteger), &isColumnConst, "const Integer"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, {}); + validateFunctionArguments(*this, arguments, mandatory_args, {}); return std::make_shared(); } From 1821638d5e3e2ee8fbea278c67e6d757f03c4253 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 16:38:00 +0000 Subject: [PATCH 256/439] Replace validateArgumentType() by validateFunctionArguments() --- src/Functions/FunctionHelpers.cpp | 14 -------------- src/Functions/FunctionHelpers.h | 6 ------ src/Functions/geohashDecode.cpp | 7 +++++-- src/Functions/geohashEncode.cpp | 22 +++++++++------------- src/Functions/geohashesInBox.cpp | 25 ++++++++++++++----------- 5 files changed, 28 insertions(+), 46 deletions(-) diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index 0027f9f281f..236afc5ecbf 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -95,20 +95,6 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName return res; } -void validateArgumentType(const IFunction & func, const DataTypes & arguments, - size_t argument_index, bool (* validator_func)(const IDataType &), - const char * type_name) -{ - if (arguments.size() <= argument_index) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of arguments of function {}", - func.getName()); - - const auto & argument = arguments[argument_index]; - if (!validator_func(*argument)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {}, expected {}", - argument->getName(), argument_index, func.getName(), type_name); -} - namespace { void validateArgumentsImpl(const IFunction & func, diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h index c08eb5265c1..4f93b236bcb 100644 --- a/src/Functions/FunctionHelpers.h +++ b/src/Functions/FunctionHelpers.h @@ -115,12 +115,6 @@ ColumnWithTypeAndName columnGetNested(const ColumnWithTypeAndName & col); /// column if it is nullable. ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName & columns); -/// Checks argument type at specified index with predicate. -/// throws if there is no argument at specified index or if predicate returns false. -void validateArgumentType(const IFunction & func, const DataTypes & arguments, - size_t argument_index, bool (* validator_func)(const IDataType &), - const char * type_name); - /// Expected arguments for a function. Can be used in conjunction with validateFunctionArguments() to check that the user-provided /// arguments match the expected arguments. struct FunctionArgumentDescriptor diff --git a/src/Functions/geohashDecode.cpp b/src/Functions/geohashDecode.cpp index b2454f5dffc..96ad7dacfc4 100644 --- a/src/Functions/geohashDecode.cpp +++ b/src/Functions/geohashDecode.cpp @@ -38,9 +38,12 @@ public: bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - validateArgumentType(*this, arguments, 0, isStringOrFixedString, "string or fixed string"); + FunctionArgumentDescriptors args{ + {"encoded", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"} + }; + validateFunctionArguments(*this, arguments, args); return std::make_shared( DataTypes{std::make_shared(), std::make_shared()}, diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index 7c353b822aa..034c8188b63 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; } namespace @@ -40,19 +39,16 @@ public: bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - validateArgumentType(*this, arguments, 0, isFloat, "float"); - validateArgumentType(*this, arguments, 1, isFloat, "float"); - if (arguments.size() == 3) - { - validateArgumentType(*this, arguments, 2, isInteger, "integer"); - } - if (arguments.size() > 3) - { - throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Too many arguments for function {} expected at most 3", - getName()); - } + FunctionArgumentDescriptors mandatory_args{ + {"longitude", static_cast(&isFloat), nullptr, "Float*"}, + {"latitude", static_cast(&isFloat), nullptr, "Float*"} + }; + FunctionArgumentDescriptors optional_args{ + {"precision", static_cast(&isInteger), nullptr, "(U)Int*"} + }; + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/geohashesInBox.cpp b/src/Functions/geohashesInBox.cpp index ac8d4a6ad8f..9429903dda7 100644 --- a/src/Functions/geohashesInBox.cpp +++ b/src/Functions/geohashesInBox.cpp @@ -35,22 +35,25 @@ public: size_t getNumberOfArguments() const override { return 5; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - validateArgumentType(*this, arguments, 0, isFloat, "float"); - validateArgumentType(*this, arguments, 1, isFloat, "float"); - validateArgumentType(*this, arguments, 2, isFloat, "float"); - validateArgumentType(*this, arguments, 3, isFloat, "float"); - validateArgumentType(*this, arguments, 4, isUInt8, "integer"); + FunctionArgumentDescriptors args{ + {"longitute_min", static_cast(&isFloat), nullptr, "Float*"}, + {"latitude_min", static_cast(&isFloat), nullptr, "Float*"}, + {"longitute_max", static_cast(&isFloat), nullptr, "Float*"}, + {"latitude_max", static_cast(&isFloat), nullptr, "Float*"}, + {"precision", static_cast(&isUInt8), nullptr, "UInt8"} + }; + validateFunctionArguments(*this, arguments, args); - if (!(arguments[0]->equals(*arguments[1]) && - arguments[0]->equals(*arguments[2]) && - arguments[0]->equals(*arguments[3]))) + if (!(arguments[0].type->equals(*arguments[1].type) && + arguments[0].type->equals(*arguments[2].type) && + arguments[0].type->equals(*arguments[3].type))) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of argument of {} all coordinate arguments must have the same type, " - "instead they are:{}, {}, {}, {}.", getName(), arguments[0]->getName(), - arguments[1]->getName(), arguments[2]->getName(), arguments[3]->getName()); + "instead they are:{}, {}, {}, {}.", getName(), arguments[0].type->getName(), + arguments[1].type->getName(), arguments[2].type->getName(), arguments[3].type->getName()); } return std::make_shared(std::make_shared()); From 659020dc8695e974241723f1fa49fc66bcc1c478 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 17:42:33 +0000 Subject: [PATCH 257/439] More aesthetic error messages --- src/Functions/FunctionHelpers.cpp | 60 +++++++++++++++---------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index 236afc5ecbf..b30f38d3d76 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -97,6 +97,19 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName namespace { + +String withOrdinalEnding(size_t i) +{ + switch (i) + { + case 0: return "1st"; + case 1: return "2nd"; + case 2: return "3rd"; + default: return std::to_string(i) + "th"; + } + +} + void validateArgumentsImpl(const IFunction & func, const ColumnsWithTypeAndName & arguments, size_t argument_offset, @@ -112,12 +125,12 @@ void validateArgumentsImpl(const IFunction & func, const auto & descriptor = descriptors[i]; if (int error_code = descriptor.isValid(arg.type, arg.column); error_code != 0) throw Exception(error_code, - "Illegal type of argument #{}{} of function {}{}{}", - argument_offset + i + 1, // +1 is for human-friendly 1-based indexing - " '" + String(descriptor.name) + "'", + "A value of illegal type was provided as {} argument '{}' to function '{}'. Expected: {}, got: {}", + withOrdinalEnding(argument_offset + i), + descriptor.name, func.getName(), - String(", expected ") + String(descriptor.type_name), - arg.type ? ", got " + arg.type->getName() : String{}); + descriptor.type_name, + arg.type ? arg.type->getName() : ""); } } @@ -144,34 +157,19 @@ void validateFunctionArguments(const IFunction & func, { if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size()) { - auto join_argument_types = [](const auto & args, const String sep = ", ") - { - String result; - for (const auto & a : args) - { - using A = std::decay_t; - if constexpr (std::is_same_v) - { - result += "'" + String(a.name) + "' : "; - result += a.type_name; - } - else if constexpr (std::is_same_v) - result += a.type->getName(); - - result += sep; - } - - if (!args.empty()) - result.erase(result.end() - sep.length(), result.end()); - - return result; - }; + auto argument_singular_or_plural = [](const auto & args){ return fmt::format("argument{}", args.size() != 1 ? "s" : ""); }; throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Incorrect number of arguments for function {} provided {}{}, expected {}{} ({}{})", - func.getName(), arguments.size(), (!arguments.empty() ? " (" + join_argument_types(arguments) + ")" : String{}), - mandatory_args.size(), (!optional_args.empty() ? " to " + std::to_string(mandatory_args.size() + optional_args.size()) : ""), - join_argument_types(mandatory_args), (!optional_args.empty() ? ", [" + join_argument_types(optional_args) + "]" : "")); + "An incorrect number of arguments was specified for function '{}'. Expected {}, got {}", + func.getName(), + (!mandatory_args.empty() && !optional_args.empty()) + ? fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args)) + : (!mandatory_args.empty() && optional_args.empty()) + ? fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)) /// intentionally not "_mandatory_ arguments" + : (mandatory_args.empty() && !optional_args.empty()) + ? fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args)) + : "0 arguments", + fmt::format("{} {}", arguments.size(), argument_singular_or_plural(arguments))); } validateArgumentsImpl(func, arguments, 0, mandatory_args); From d2cade4aa38be9f33715d593cb2e0d549c9f565e Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 2 Jul 2024 20:11:06 +0200 Subject: [PATCH 258/439] Relax the check in 02982_aggregation_states_destruction --- .../queries/0_stateless/02982_aggregation_states_destruction.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh index 263a4535c0e..84183606d48 100755 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh +++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh @@ -11,4 +11,4 @@ $CLICKHOUSE_CLIENT --query_id $query_id --log_query_threads 1 --query="select nu $CLICKHOUSE_CLIENT -q "system flush logs;" -$CLICKHOUSE_CLIENT -q "select count() > 0, (countIf(thread_name = 'AggregDestruct') as aggs) > 0, aggs > 1 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase();" +$CLICKHOUSE_CLIENT -q "select count() > 0 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase() and thread_name = 'AggregDestruct';" From 073471530b1e6bc8f08b959b4071cd0a376f24e1 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 2 Jul 2024 20:30:46 +0200 Subject: [PATCH 259/439] fix test --- tests/queries/0_stateless/01158_zookeeper_log_long.sql | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01158_zookeeper_log_long.sql b/tests/queries/0_stateless/01158_zookeeper_log_long.sql index 55d4162fc48..804cdf48fb6 100644 --- a/tests/queries/0_stateless/01158_zookeeper_log_long.sql +++ b/tests/queries/0_stateless/01158_zookeeper_log_long.sql @@ -29,14 +29,20 @@ select 'parts'; select type, has_watch, op_num, replace(path, toString(serverUUID()), ''), is_ephemeral, is_sequential, if(startsWith(path, '/clickhouse/sessions'), 1, version), requests_size, request_idx, error, watch_type, watch_state, path_created, stat_version, stat_cversion, stat_dataLength, stat_numChildren from system.zookeeper_log -where (session_id, xid) in (select session_id, xid from system.zookeeper_log where path='/test/01158/' || currentDatabase() || '/rmt/replicas/1/parts/all_0_0_0') +where (session_id, xid) in ( + select session_id, xid from system.zookeeper_log where path='/test/01158/' || currentDatabase() || '/rmt/replicas/1/parts/all_0_0_0' + and (query_id='' or query_id in (select query_id from system.query_log where current_database=currentDatabase() and event_date>=yesterday())) +) order by xid, type, request_idx; select 'blocks'; select type, has_watch, op_num, path, is_ephemeral, is_sequential, version, requests_size, request_idx, error, watch_type, watch_state, path_created, stat_version, stat_cversion, stat_dataLength, stat_numChildren from system.zookeeper_log -where (session_id, xid) in (select session_id, xid from system.zookeeper_log where path like '/test/01158/' || currentDatabase() || '/rmt/blocks/%' and op_num not in (1, 12, 500)) +where (session_id, xid) in ( + select session_id, xid from system.zookeeper_log where path like '/test/01158/' || currentDatabase() || '/rmt/blocks/%' and op_num not in (1, 12, 500) + and (query_id='' or query_id in (select query_id from system.query_log where current_database=currentDatabase() and event_date>=yesterday())) +) order by xid, type, request_idx; drop table rmt sync; From 0afccecd6b4048414d672918f5351fe80abd4548 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 19:05:23 +0000 Subject: [PATCH 260/439] Fix build --- src/Functions/FunctionBase64Conversion.h | 2 +- src/Functions/seriesPeriodDetectFFT.cpp | 2 +- src/Functions/sqid.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index 083179c3ca8..363b9ee3a31 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -202,7 +202,7 @@ public: {"value", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_arguments); + validateFunctionArguments(*this, arguments, mandatory_arguments); return std::make_shared(); } diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp index e85b3a97c67..471354235d5 100644 --- a/src/Functions/seriesPeriodDetectFFT.cpp +++ b/src/Functions/seriesPeriodDetectFFT.cpp @@ -53,7 +53,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{{"time_series", static_cast(&isArray), nullptr, "Array"}}; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp index 6679646fef4..0e133590b84 100644 --- a/src/Functions/sqid.cpp +++ b/src/Functions/sqid.cpp @@ -100,7 +100,7 @@ public: FunctionArgumentDescriptors args{ {"sqid", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared()); } From 54c4f02dca9fc0d33d717c7fb4122834ac214ae9 Mon Sep 17 00:00:00 2001 From: Justin de Guzman Date: Tue, 2 Jul 2024 15:22:48 -0700 Subject: [PATCH 261/439] [Docs] Better wording for behavior of MATERIALIZED expr --- docs/en/sql-reference/statements/create/table.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 0253bc647e6..b866d0b9f5f 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -152,7 +152,7 @@ SELECT * FROM test; `MATERIALIZED expr` -Materialized expression. Values of such columns are always calculated, they cannot be specified in INSERT queries. +Materialized expression. Values of such columns are automatically calculated according to the specified materialized expression when rows are inserted. Values cannot be explicitly specified during `INSERT`s. Also, default value columns of this type are not included in the result of `SELECT *`. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. This behavior can be disabled with setting `asterisk_include_materialized_columns`. From 4680489c374248684feb0ea6ad78fcea4e81d7b0 Mon Sep 17 00:00:00 2001 From: Peignon Melvyn Date: Wed, 3 Jul 2024 00:31:50 +0200 Subject: [PATCH 262/439] Improve messaging around the JSON object datatype - Changed the title - Improve messaging around the future of this feature --- .../data-types/{json.md => object-data-type.md} | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) rename docs/en/sql-reference/data-types/{json.md => object-data-type.md} (79%) diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/object-data-type.md similarity index 79% rename from docs/en/sql-reference/data-types/json.md rename to docs/en/sql-reference/data-types/object-data-type.md index 39e37abad82..0a3f780569f 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/object-data-type.md @@ -1,24 +1,19 @@ --- -slug: /en/sql-reference/data-types/json +slug: /en/sql-reference/data-types/object-data-type sidebar_position: 26 -sidebar_label: JSON +sidebar_label: Object Data Type --- -# JSON +# Object Data Type :::note -This feature is experimental and is not production-ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead. +This feature is not production-ready and is now deprecated. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864) ::: Stores JavaScript Object Notation (JSON) documents in a single column. `JSON` is an alias for `Object('json')`. -:::note -The JSON data type is an obsolete feature. Do not use it. -If you want to use it, set `allow_experimental_object_type = 1`. -::: - ## Example **Example 1** From e06d1d81603e10453ef42e38e14bb33bdd6939e5 Mon Sep 17 00:00:00 2001 From: Justin de Guzman Date: Tue, 2 Jul 2024 17:49:42 -0700 Subject: [PATCH 263/439] Formatting --- docs/en/sql-reference/data-types/object-data-type.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/data-types/object-data-type.md b/docs/en/sql-reference/data-types/object-data-type.md index 0a3f780569f..cb9c0810c84 100644 --- a/docs/en/sql-reference/data-types/object-data-type.md +++ b/docs/en/sql-reference/data-types/object-data-type.md @@ -2,6 +2,7 @@ slug: /en/sql-reference/data-types/object-data-type sidebar_position: 26 sidebar_label: Object Data Type +keywords: [object, data type] --- # Object Data Type @@ -44,7 +45,7 @@ SELECT o.a, o.b.c, o.b.d[3] FROM json **Example 2** -To be able to create an ordered `MergeTree` family table the sorting key has to be extracted into its column. For example, to insert a file of compressed HTTP access logs in JSON format: +To be able to create an ordered `MergeTree` family table, the sorting key has to be extracted into its column. For example, to insert a file of compressed HTTP access logs in JSON format: ```sql CREATE TABLE logs @@ -64,7 +65,7 @@ FROM file('access.json.gz', JSONAsString) ## Displaying JSON columns -When displaying a `JSON` column ClickHouse only shows the field values by default (because internally, it is represented as a tuple). You can display the field names as well by setting `output_format_json_named_tuples_as_objects = 1`: +When displaying a `JSON` column, ClickHouse only shows the field values by default (because internally, it is represented as a tuple). You can also display the field names by setting `output_format_json_named_tuples_as_objects = 1`: ```sql SET output_format_json_named_tuples_as_objects = 1 @@ -78,4 +79,5 @@ SELECT * FROM json FORMAT JSONEachRow ## Related Content +- [Using JSON in ClickHouse](/docs/en/integrations/data-formats/json) - [Getting Data Into ClickHouse - Part 2 - A JSON detour](https://clickhouse.com/blog/getting-data-into-clickhouse-part-2-json) From ca49cbafd96cfd0972a859c369f98265a84959f3 Mon Sep 17 00:00:00 2001 From: Justin de Guzman Date: Tue, 2 Jul 2024 17:50:06 -0700 Subject: [PATCH 264/439] Fix link --- docs/en/sql-reference/data-types/object-data-type.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/data-types/object-data-type.md b/docs/en/sql-reference/data-types/object-data-type.md index cb9c0810c84..c29be2cff58 100644 --- a/docs/en/sql-reference/data-types/object-data-type.md +++ b/docs/en/sql-reference/data-types/object-data-type.md @@ -8,7 +8,7 @@ keywords: [object, data type] # Object Data Type :::note -This feature is not production-ready and is now deprecated. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864) +This feature is not production-ready and is now deprecated. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864) ::: Stores JavaScript Object Notation (JSON) documents in a single column. From 67aca82d9e3565463588b942cf73581303dd47f2 Mon Sep 17 00:00:00 2001 From: Justin de Guzman Date: Tue, 2 Jul 2024 18:09:40 -0700 Subject: [PATCH 265/439] Revert file name change (changing slug is sufficient) --- docs/en/sql-reference/data-types/{object-data-type.md => json.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/en/sql-reference/data-types/{object-data-type.md => json.md} (100%) diff --git a/docs/en/sql-reference/data-types/object-data-type.md b/docs/en/sql-reference/data-types/json.md similarity index 100% rename from docs/en/sql-reference/data-types/object-data-type.md rename to docs/en/sql-reference/data-types/json.md From bcf8a93a52204cb80a867c237f302221bb51c272 Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 3 Jul 2024 01:34:25 -0400 Subject: [PATCH 266/439] `max_query_length` argument for the fuzzQuery --- .../table-functions/fuzzQuery.md | 3 ++- src/Storages/StorageFuzzQuery.cpp | 20 +++++++++++++------ src/Storages/StorageFuzzQuery.h | 1 + .../03031_table_function_fuzzquery.sql | 4 ++-- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/docs/en/sql-reference/table-functions/fuzzQuery.md b/docs/en/sql-reference/table-functions/fuzzQuery.md index ff8cfd1cd3b..e15f8a40156 100644 --- a/docs/en/sql-reference/table-functions/fuzzQuery.md +++ b/docs/en/sql-reference/table-functions/fuzzQuery.md @@ -9,12 +9,13 @@ sidebar_label: fuzzQuery Perturbs the given query string with random variations. ``` sql -fuzzQuery(query[, random_seed]) +fuzzQuery(query[, max_query_length[, random_seed]]) ``` **Arguments** - `query` (String) - The source query to perform the fuzzing on. +- `max_query_length` (UInt64) - A maximum length the query can get during the fuzzing process. - `random_seed` (UInt64) - A random seed for producing stable results. **Returned Value** diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp index 5e29a04427b..229ae1af7c1 100644 --- a/src/Storages/StorageFuzzQuery.cpp +++ b/src/Storages/StorageFuzzQuery.cpp @@ -47,7 +47,7 @@ ColumnPtr FuzzQuerySource::createColumn() size_t data_len = data.size(); /// AST is too long, will start from the original query. - if (data_len > 500) + if (config.max_query_length > 500) { fuzz_base = query; continue; @@ -120,10 +120,11 @@ StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine // Supported signatures: // - // FuzzQuery('query') - // FuzzQuery('query', 'random_seed') - if (engine_args.empty() || engine_args.size() > 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 2 arguments: query, random_seed"); + // FuzzQuery(query) + // FuzzQuery(query, max_query_length) + // FuzzQuery(query, max_query_length, random_seed) + if (engine_args.empty() || engine_args.size() > 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 3 arguments: query, max_query_length, random_seed"); for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); @@ -131,9 +132,16 @@ StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); configuration.query = std::move(first_arg); - if (engine_args.size() == 2) + if (engine_args.size() >= 2) { const auto & literal = engine_args[1]->as(); + if (!literal.value.isNull()) + configuration.max_query_length = checkAndGetLiteralArgument(literal, "max_query_length"); + } + + if (engine_args.size() == 3) + { + const auto & literal = engine_args[2]->as(); if (!literal.value.isNull()) configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); } diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h index 3ae506fdfb8..125ef960e74 100644 --- a/src/Storages/StorageFuzzQuery.h +++ b/src/Storages/StorageFuzzQuery.h @@ -18,6 +18,7 @@ public: struct Configuration : public StatelessTableEngineConfiguration { String query; + UInt64 max_query_length = 500; UInt64 random_seed = randomSeed(); }; diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql index 5821e2e5111..b26096f7f0e 100644 --- a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql @@ -1,5 +1,5 @@ -SELECT * FROM fuzzQuery('SELECT 1', 8956) LIMIT 0 FORMAT TSVWithNamesAndTypes; +SELECT * FROM fuzzQuery('SELECT 1', 500, 8956) LIMIT 0 FORMAT TSVWithNamesAndTypes; SELECT * FROM fuzzQuery('SELECT * FROM ( @@ -15,4 +15,4 @@ FROM ( ) AS r ON l.item_id = r.item_id ORDER BY 1,2,3; -', 8956) LIMIT 10 FORMAT NULL; +', 500, 8956) LIMIT 10 FORMAT NULL; From 6079373ce3ef1107bc7ea634c6d1e1ceac24744d Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 08:36:16 +0000 Subject: [PATCH 267/439] Incorporate review feedback --- src/Functions/FunctionHelpers.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index b30f38d3d76..c658063b66f 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -157,18 +157,22 @@ void validateFunctionArguments(const IFunction & func, { if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size()) { - auto argument_singular_or_plural = [](const auto & args){ return fmt::format("argument{}", args.size() != 1 ? "s" : ""); }; + auto argument_singular_or_plural = [](const auto & args) -> std::string_view { return args.size() == 1 ? "argument" : "arguments"; }; + + String expected_args_string; + if (!mandatory_args.empty() && !optional_args.empty()) + expected_args_string = fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args)); + else if (!mandatory_args.empty() && optional_args.empty()) + expected_args_string = fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)); /// intentionally not "_mandatory_ arguments" + else if (mandatory_args.empty() && !optional_args.empty()) + expected_args_string = fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args)); + else + expected_args_string = "0 arguments"; throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "An incorrect number of arguments was specified for function '{}'. Expected {}, got {}", func.getName(), - (!mandatory_args.empty() && !optional_args.empty()) - ? fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args)) - : (!mandatory_args.empty() && optional_args.empty()) - ? fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)) /// intentionally not "_mandatory_ arguments" - : (mandatory_args.empty() && !optional_args.empty()) - ? fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args)) - : "0 arguments", + expected_args_string, fmt::format("{} {}", arguments.size(), argument_singular_or_plural(arguments))); } From 1f309ef342360ba2207a2ed1e7eb87c0eaa9cfde Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 3 Jul 2024 11:03:32 +0200 Subject: [PATCH 268/439] Bump From f2ffd727f002702ab29dff7c2d18ceaaf8e09e6e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 3 Jul 2024 11:04:04 +0200 Subject: [PATCH 269/439] Bump From c86cdbb243c9093e3eb59134de08beb42f1d4c02 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 11:05:34 +0200 Subject: [PATCH 270/439] Remove scary jemalloc log --- programs/server/Server.cpp | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 4cb3b5f45c7..f992fdc13a9 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -133,10 +133,6 @@ # include #endif -#if USE_JEMALLOC -# include -#endif - #if USE_AZURE_BLOB_STORAGE # include # include @@ -176,34 +172,10 @@ namespace ProfileEvents namespace fs = std::filesystem; -#if USE_JEMALLOC -static bool jemallocOptionEnabled(const char *name) -{ - bool value; - size_t size = sizeof(value); - - if (mallctl(name, reinterpret_cast(&value), &size, /* newp= */ nullptr, /* newlen= */ 0)) - throw Poco::SystemException("mallctl() failed"); - - return value; -} -#else -static bool jemallocOptionEnabled(const char *) { return false; } -#endif - int mainEntryClickHouseServer(int argc, char ** argv) { DB::Server app; - if (jemallocOptionEnabled("opt.background_thread")) - { - LOG_ERROR(&app.logger(), - "jemalloc.background_thread was requested, " - "however ClickHouse uses percpu_arena and background_thread most likely will not give any benefits, " - "and also background_thread is not compatible with ClickHouse watchdog " - "(that can be disabled with CLICKHOUSE_WATCHDOG_ENABLE=0)"); - } - /// Do not fork separate process from watchdog if we attached to terminal. /// Otherwise it breaks gdb usage. /// Can be overridden by environment variable (cannot use server config at this moment). From d199a243fad2f353e7f9b32493b0d0dba9a80573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 3 Jul 2024 11:34:35 +0200 Subject: [PATCH 271/439] Move experimental settings to the experimental block --- src/Core/Settings.h | 139 ++++++++++++++++++++++++-------------------- 1 file changed, 77 insertions(+), 62 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d84e5b149f6..ee56c1133bf 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -164,9 +164,6 @@ class IColumn; M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \ M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \ \ - M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \ - M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \ - \ M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \ M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \ M(Bool, alter_move_to_space_execute_async, false, "Execute ALTER TABLE MOVE ... TO [DISK|VOLUME] asynchronously", 0) \ @@ -202,21 +199,6 @@ class IColumn; M(Bool, group_by_use_nulls, false, "Treat columns mentioned in ROLLUP, CUBE or GROUPING SETS as Nullable", 0) \ \ M(NonZeroUInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled. Should be always greater than 0", 0) \ - M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \ - M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \ - M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \ - M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \ - M(UInt64, parallel_replicas_custom_key_range_lower, 0, "Lower bound for the universe that the parallel replicas custom range filter is calculated over", 0) \ - M(UInt64, parallel_replicas_custom_key_range_upper, 0, "Upper bound for the universe that the parallel replicas custom range filter is calculated over. A value of 0 disables the upper bound, setting it to the max value of the custom key expression", 0) \ - \ - M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \ - M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \ - M(Bool, parallel_replicas_allow_in_with_subquery, true, "If true, subquery for IN will be executed on every follower replica.", 0) \ - M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \ - M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \ - M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ - M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \ - M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \ \ M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards. Shard is marked as unavailable when: 1) The shard cannot be reached due to a connection failure. 2) Shard is unresolvable through DNS. 3) Table does not exist on the shard.", 0) \ \ @@ -248,8 +230,6 @@ class IColumn; M(Bool, do_not_merge_across_partitions_select_final, false, "Merge parts only in one partition in select final", 0) \ M(Bool, split_parts_ranges_into_intersecting_and_non_intersecting_final, true, "Split parts ranges into intersecting and non intersecting during FINAL optimization", 0) \ M(Bool, split_intersecting_parts_ranges_into_layers_final, true, "Split intersecting parts ranges into layers during FINAL optimization", 0) \ - M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \ - M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \ \ M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \ M(Bool, mysql_map_string_to_text_in_show_columns, true, "If enabled, String type will be mapped to TEXT in SHOW [FULL] COLUMNS, BLOB otherwise. Has an effect only when the connection is made through the MySQL wire protocol.", 0) \ @@ -338,7 +318,6 @@ class IColumn; M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \ \ M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \ - M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \ \ M(JoinStrictness, join_default_strictness, JoinStrictness::All, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \ M(Bool, any_join_distinct_right_table_keys, false, "Enable old ANY JOIN logic with many-to-one left-to-right table keys mapping for all ANY JOINs. It leads to confusing not equal results for 't1 ANY LEFT JOIN t2' and 't2 ANY RIGHT JOIN t1'. ANY RIGHT JOIN needs one-to-many keys mapping to be consistent with LEFT one.", IMPORTANT) \ @@ -389,7 +368,6 @@ class IColumn; M(Bool, empty_result_for_aggregation_by_constant_keys_on_empty_set, true, "Return empty result when aggregating by constant keys on empty set.", 0) \ M(Bool, allow_distributed_ddl, true, "If it is set to true, then a user is allowed to executed distributed DDL queries.", 0) \ M(Bool, allow_suspicious_codecs, false, "If it is set to true, allow to specify meaningless compression codecs.", 0) \ - M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \ M(Bool, enable_deflate_qpl_codec, false, "Enable/disable the DEFLATE_QPL codec.", 0) \ M(Bool, enable_zstd_qat_codec, false, "Enable/disable the ZSTD_QAT codec.", 0) \ M(UInt64, query_profiler_real_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for real clock timer of query profiler (in nanoseconds). Set 0 value to turn off the real clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ @@ -399,8 +377,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", 0) \ - M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ + \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ \ @@ -590,13 +567,6 @@ class IColumn; M(UInt64, distributed_replica_error_cap, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, "Max number of errors per replica, prevents piling up an incredible amount of errors if replica was offline for some time and allows it to be reconsidered in a shorter amount of time.", 0) \ M(UInt64, distributed_replica_max_ignored_errors, 0, "Number of errors that will be ignored while choosing replicas", 0) \ \ - M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \ - M(Seconds, live_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate live query is alive.", 0) \ - M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \ - M(Bool, allow_experimental_window_view, false, "Enable WINDOW VIEW. Not mature enough.", 0) \ - M(Seconds, window_view_clean_interval, 60, "The clean interval of window view in seconds to free outdated data.", 0) \ - M(Seconds, window_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \ - M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \ M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \ \ M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \ @@ -635,8 +605,6 @@ class IColumn; M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ - M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ - M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \ M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ @@ -653,8 +621,6 @@ class IColumn; M(Bool, cast_keep_nullable, false, "CAST operator keep Nullable for result data type", 0) \ M(Bool, cast_ipv4_ipv6_default_on_conversion_error, false, "CAST operator into IPv4, CAST operator into IPV6 type, toIPv4, toIPv6 functions will return default value instead of throwing exception on conversion error.", 0) \ M(Bool, alter_partition_verbose_result, false, "Output information about affected parts. Currently works only for FREEZE and ATTACH commands.", 0) \ - M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \ - M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \ M(Bool, system_events_show_zero_values, false, "When querying system.events or system.metrics tables, include all metrics, even with zero values.", 0) \ M(MySQLDataTypesSupport, mysql_datatypes_support_level, MySQLDataTypesSupportList{}, "Defines how MySQL types are converted to corresponding ClickHouse types. A comma separated list in any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. decimal: convert NUMERIC and DECIMAL types to Decimal when precision allows it. datetime64: convert DATETIME and TIMESTAMP types to DateTime64 instead of DateTime when precision is not 0. date2Date32: convert DATE to Date32 instead of Date. Takes precedence over date2String. date2String: convert DATE to String instead of Date. Overridden by datetime64.", 0) \ M(Bool, optimize_trivial_insert_select, false, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \ @@ -716,9 +682,6 @@ class IColumn; M(Bool, force_aggregate_partitions_independently, false, "Force the use of optimization when it is applicable, but heuristics decided not to use it", 0) \ M(UInt64, max_number_of_partitions_for_independent_aggregation, 128, "Maximal number of partitions in table to apply optimization", 0) \ M(Float, min_hit_rate_to_use_consecutive_keys_optimization, 0.5, "Minimal hit rate of a cache which is used for consecutive keys optimization in aggregation to keep it enabled", 0) \ - /** Experimental feature for moving data between shards. */ \ - \ - M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \ \ M(Bool, engine_file_empty_if_not_exists, false, "Allows to select data from a file engine table without file", 0) \ M(Bool, engine_file_truncate_on_insert, false, "Enables or disables truncate before insert in file engine tables", 0) \ @@ -898,34 +861,11 @@ class IColumn; M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \ M(Bool, cast_string_to_dynamic_use_inference, false, "Use types inference during String to Dynamic conversion", 0) \ M(Bool, enable_blob_storage_log, true, "Write information about blob storage operations to system.blob_storage_log table", 0) \ - \ - /** Experimental functions */ \ - M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ - M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ - M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ - M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ - M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ - M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \ - M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \ - M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ - M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \ - M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ - M(UInt64, max_threads_for_annoy_index_creation, 4, "Number of threads used to build Annoy indexes (0 means all cores, not recommended)", 0) \ - M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \ - M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \ - M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \ - M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \ - M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \ - M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \ - M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \ - M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ - M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ - M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \ M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \ M(Bool, print_pretty_type_names, true, "Print pretty type names in DESCRIBE query and toTypeName() function", 0) \ M(Bool, create_table_empty_primary_key_by_default, false, "Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified", 0) \ - M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0)\ + M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0) \ M(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, "Allows to set a default value for SQL SECURITY option when creating a normal view.", 0) \ M(SQLSecurityType, default_materialized_view_sql_security, SQLSecurityType::DEFINER, "Allows to set a default value for SQL SECURITY option when creating a materialized view.", 0) \ M(String, default_view_definer, "CURRENT_USER", "Allows to set a default value for DEFINER option when creating view.", 0) \ @@ -935,6 +875,81 @@ class IColumn; M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \ M(Bool, allow_deprecated_error_prone_window_functions, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)", 0) \ M(Bool, allow_deprecated_snowflake_conversion_functions, false, "Enables deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake.", 0) \ + M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \ + M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ + M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ + \ + /** Experimental features */ \ + M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ + M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ + M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ + M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ + M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ + M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \ + M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \ + M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ + M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \ + M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \ + M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ + M(UInt64, max_threads_for_annoy_index_creation, 4, "Number of threads used to build Annoy indexes (0 means all cores, not recommended)", 0) \ + M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \ + M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \ + M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \ + M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \ + M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \ + M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \ + M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ + \ + M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \ + M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \ + \ + /* Parallel replicas */ \ + M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \ + M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \ + M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \ + M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \ + M(UInt64, parallel_replicas_custom_key_range_lower, 0, "Lower bound for the universe that the parallel replicas custom range filter is calculated over", 0) \ + M(UInt64, parallel_replicas_custom_key_range_upper, 0, "Upper bound for the universe that the parallel replicas custom range filter is calculated over. A value of 0 disables the upper bound, setting it to the max value of the custom key expression", 0) \ + M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \ + M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \ + M(Bool, parallel_replicas_allow_in_with_subquery, true, "If true, subquery for IN will be executed on every follower replica.", 0) \ + M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \ + M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \ + M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ + M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \ + M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \ + \ + M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \ + M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \ + \ + M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \ + \ + /* Analyzer: It's not experimental anymore (WIP) */ \ + M(Bool, allow_experimental_analyzer, true, "Allow new query analyzer.", 0) \ + M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ + \ + M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \ + M(Seconds, live_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate live query is alive.", 0) \ + M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \ + \ + M(Bool, allow_experimental_window_view, false, "Enable WINDOW VIEW. Not mature enough.", 0) \ + M(Seconds, window_view_clean_interval, 60, "The clean interval of window view in seconds to free outdated data.", 0) \ + M(Seconds, window_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \ + M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \ + \ + M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ + M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \ + \ + M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \ + M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \ + \ + /** Experimental feature for moving data between shards. */ \ + M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \ + + + + /** End of experimental features */ + // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. From cfafbc388cb1ac3ca7c5c6810f3f5c00f3b8b3d5 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 3 Jul 2024 11:56:07 +0200 Subject: [PATCH 272/439] Fix test_drop_table --- .../ObjectStorageQueue/ObjectStorageQueueSource.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 683a7038bb6..dc5fb6d2744 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -422,8 +422,14 @@ Chunk ObjectStorageQueueSource::generate() Chunk ObjectStorageQueueSource::generateImpl() { - while (!shutdown_called) + while (true) { + if (shutdown_called) + { + LOG_TRACE(log, "Shutdown was called, stopping sync"); + break; + } + if (!reader) { const auto context = getContext(); From 601ee4ee3e50b2967063a124ac921753e3546d6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 3 Jul 2024 12:08:02 +0200 Subject: [PATCH 273/439] Update Settings.h --- src/Core/Settings.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ee56c1133bf..b45c1e38d1c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -946,8 +946,6 @@ class IColumn; /** Experimental feature for moving data between shards. */ \ M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \ - - /** End of experimental features */ From 198b80b6a252ef25f8b4f269a53c39ec5ae0e76f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 27 Jun 2024 16:36:43 +0000 Subject: [PATCH 274/439] Cosmetics No code was harmed in the process (really just cosmetics). --- src/Storages/Statistics/Statistics.cpp | 40 +++++++++---------- src/Storages/Statistics/Statistics.h | 28 +++++-------- src/Storages/Statistics/TDigestStatistics.cpp | 34 ++++++++-------- src/Storages/Statistics/TDigestStatistics.h | 8 +--- src/Storages/Statistics/UniqStatistics.cpp | 2 +- src/Storages/Statistics/UniqStatistics.h | 3 +- 6 files changed, 48 insertions(+), 67 deletions(-) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index fed0bd61c03..a4c57c9eef4 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -1,6 +1,3 @@ -#include -#include - #include #include #include @@ -10,6 +7,8 @@ #include #include #include +#include + namespace DB { @@ -20,7 +19,6 @@ namespace ErrorCodes extern const int INCORRECT_QUERY; } -/// Version / bitmask of statistics / data of statistics / enum StatisticsFileVersion : UInt16 { V0 = 0, @@ -29,17 +27,15 @@ enum StatisticsFileVersion : UInt16 IStatistics::IStatistics(const SingleStatisticsDescription & stat_) : stat(stat_) {} ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_) - : stats_desc(stats_desc_), rows(0) + : stats_desc(stats_desc_) { } void ColumnStatistics::update(const ColumnPtr & column) { rows += column->size(); - for (const auto & iter : stats) - { - iter.second->update(column); - } + for (const auto & stat : stats) + stat.second->update(column); } Float64 ColumnStatistics::estimateLess(Float64 val) const @@ -76,14 +72,17 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const void ColumnStatistics::serialize(WriteBuffer & buf) { writeIntBinary(V0, buf); + UInt64 stat_types_mask = 0; for (const auto & [type, _]: stats) stat_types_mask |= 1 << UInt8(type); writeIntBinary(stat_types_mask, buf); - /// We write some basic statistics + + /// store the column row count as it is always useful writeIntBinary(rows, buf); - /// We write complex statistics - for (const auto & [type, stat_ptr]: stats) + + /// write the actual statistics object + for (const auto & [type, stat_ptr] : stats) stat_ptr->serialize(buf); } @@ -96,7 +95,9 @@ void ColumnStatistics::deserialize(ReadBuffer &buf) UInt64 stat_types_mask = 0; readIntBinary(stat_types_mask, buf); + readIntBinary(rows, buf); + for (auto it = stats.begin(); it != stats.end();) { if (!(stat_types_mask & 1 << UInt8(it->first))) @@ -136,15 +137,15 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va { if (!validators.emplace(stats_type, std::move(validator)).second) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticsFactory: the statistics validator type {} is not unique", stats_type); - } MergeTreeStatisticsFactory::MergeTreeStatisticsFactory() { - registerCreator(StatisticsType::TDigest, TDigestCreator); - registerCreator(StatisticsType::Uniq, UniqCreator); registerValidator(StatisticsType::TDigest, TDigestValidator); + registerCreator(StatisticsType::TDigest, TDigestCreator); + registerValidator(StatisticsType::Uniq, UniqValidator); + registerCreator(StatisticsType::Uniq, UniqCreator); } MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance() @@ -159,9 +160,7 @@ void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & st { auto it = validators.find(type); if (it == validators.end()) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown Statistic type '{}'", type); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistic type '{}'", type); it->second(desc, data_type); } } @@ -173,10 +172,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescri { auto it = creators.find(type); if (it == creators.end()) - { - throw Exception(ErrorCodes::INCORRECT_QUERY, - "Unknown Statistic type '{}'. Available types: tdigest, uniq", type); - } + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq'", type); auto stat_ptr = (it->second)(desc, stats.data_type); column_stat->stats[type] = stat_ptr; } diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index 2ab1337af02..5e756e48d42 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -1,19 +1,15 @@ #pragma once -#include -#include - #include -#include #include #include #include +#include namespace DB { -/// this is for user-defined statistic. constexpr auto STATS_FILE_PREFIX = "statistics_"; constexpr auto STATS_FILE_SUFFIX = ".stats"; @@ -25,11 +21,9 @@ class IStatistics { public: explicit IStatistics(const SingleStatisticsDescription & stat_); - virtual ~IStatistics() = default; virtual void serialize(WriteBuffer & buf) = 0; - virtual void deserialize(ReadBuffer & buf) = 0; virtual void update(const ColumnPtr & column) = 0; @@ -43,11 +37,12 @@ using StatisticsPtr = std::shared_ptr; class ColumnStatistics { public: - explicit ColumnStatistics(const ColumnStatisticsDescription & stats_); + explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_); + void serialize(WriteBuffer & buf); void deserialize(ReadBuffer & buf); - String getFileName() const; + String getFileName() const; const String & columnName() const; UInt64 rowCount() const; @@ -55,17 +50,14 @@ public: void update(const ColumnPtr & column); Float64 estimateLess(Float64 val) const; - Float64 estimateGreater(Float64 val) const; - Float64 estimateEqual(Float64 val) const; private: - friend class MergeTreeStatisticsFactory; ColumnStatisticsDescription stats_desc; std::map stats; - UInt64 rows; /// the number of rows of the column + UInt64 rows = 0; /// the number of rows in the column }; class ColumnsDescription; @@ -79,25 +71,23 @@ public: void validate(const ColumnStatisticsDescription & stats, DataTypePtr data_type) const; + using Validator = std::function; using Creator = std::function; - using Validator = std::function; - ColumnStatisticsPtr get(const ColumnStatisticsDescription & stats) const; - ColumnsStatistics getMany(const ColumnsDescription & columns) const; - void registerCreator(StatisticsType type, Creator creator); void registerValidator(StatisticsType type, Validator validator); + void registerCreator(StatisticsType type, Creator creator); protected: MergeTreeStatisticsFactory(); private: - using Creators = std::unordered_map; using Validators = std::unordered_map; - Creators creators; + using Creators = std::unordered_map; Validators validators; + Creators creators; }; } diff --git a/src/Storages/Statistics/TDigestStatistics.cpp b/src/Storages/Statistics/TDigestStatistics.cpp index aa5662c979d..2f254b604e4 100644 --- a/src/Storages/Statistics/TDigestStatistics.cpp +++ b/src/Storages/Statistics/TDigestStatistics.cpp @@ -8,53 +8,53 @@ namespace ErrorCodes extern const int ILLEGAL_STATISTICS; } -TDigestStatistics::TDigestStatistics(const SingleStatisticsDescription & stat_): - IStatistics(stat_) +TDigestStatistics::TDigestStatistics(const SingleStatisticsDescription & stat_) + : IStatistics(stat_) { } Float64 TDigestStatistics::estimateLess(Float64 val) const { - return data.getCountLessThan(val); + return t_digest.getCountLessThan(val); } Float64 TDigestStatistics::estimateEqual(Float64 val) const { - return data.getCountEqual(val); + return t_digest.getCountEqual(val); } void TDigestStatistics::serialize(WriteBuffer & buf) { - data.serialize(buf); + t_digest.serialize(buf); } void TDigestStatistics::deserialize(ReadBuffer & buf) { - data.deserialize(buf); + t_digest.deserialize(buf); } void TDigestStatistics::update(const ColumnPtr & column) { - size_t size = column->size(); + size_t rows = column->size(); - for (size_t i = 0; i < size; ++i) + for (size_t row = 0; row < rows; ++row) { /// TODO: support more types. - Float64 value = column->getFloat64(i); - data.add(value, 1); + Float64 value = column->getFloat64(row); + t_digest.add(value, 1); } } +void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +{ + data_type = removeNullable(data_type); + if (!data_type->isValueRepresentedByNumber()) + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName()); +} + StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) { return std::make_shared(stat); } -void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) -{ - data_type = removeNullable(data_type); - if (!data_type->isValueRepresentedByNumber()) - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' does not support type {}", data_type->getName()); -} - } diff --git a/src/Storages/Statistics/TDigestStatistics.h b/src/Storages/Statistics/TDigestStatistics.h index 7c361b8751f..2e29becc5ee 100644 --- a/src/Storages/Statistics/TDigestStatistics.h +++ b/src/Storages/Statistics/TDigestStatistics.h @@ -6,27 +6,23 @@ namespace DB { - -/// TDigestStatistic is a kind of histogram. class TDigestStatistics : public IStatistics { public: explicit TDigestStatistics(const SingleStatisticsDescription & stat_); Float64 estimateLess(Float64 val) const; - Float64 estimateEqual(Float64 val) const; void serialize(WriteBuffer & buf) override; - void deserialize(ReadBuffer & buf) override; void update(const ColumnPtr & column) override; private: - QuantileTDigest data; + QuantileTDigest t_digest; }; -StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); } diff --git a/src/Storages/Statistics/UniqStatistics.cpp b/src/Storages/Statistics/UniqStatistics.cpp index fc748e769ca..2f7a75db504 100644 --- a/src/Storages/Statistics/UniqStatistics.cpp +++ b/src/Storages/Statistics/UniqStatistics.cpp @@ -55,7 +55,7 @@ void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); if (!data_type->isValueRepresentedByNumber()) - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' does not support type {}", data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName()); } StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) diff --git a/src/Storages/Statistics/UniqStatistics.h b/src/Storages/Statistics/UniqStatistics.h index 0d86a6e458a..bf097620a86 100644 --- a/src/Storages/Statistics/UniqStatistics.h +++ b/src/Storages/Statistics/UniqStatistics.h @@ -17,7 +17,6 @@ public: UInt64 getCardinality(); void serialize(WriteBuffer & buf) override; - void deserialize(ReadBuffer & buf) override; void update(const ColumnPtr & column) override; @@ -30,7 +29,7 @@ private: }; -StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); } From 337871e0ec0f8d1c6c89d5b0d39977ea689adc22 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 27 Jun 2024 18:02:58 +0000 Subject: [PATCH 275/439] Move some methods around Makes the order of methods within classes consistent. Did not touch the code itself. --- src/Storages/Statistics/Statistics.cpp | 5 +++- src/Storages/Statistics/Statistics.h | 4 +-- src/Storages/Statistics/TDigestStatistics.cpp | 28 +++++++++---------- src/Storages/Statistics/TDigestStatistics.h | 7 +++-- src/Storages/Statistics/UniqStatistics.cpp | 20 ++++++------- src/Storages/Statistics/UniqStatistics.h | 6 ++-- 6 files changed, 36 insertions(+), 34 deletions(-) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index a4c57c9eef4..5666f0bbf18 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -24,7 +24,10 @@ enum StatisticsFileVersion : UInt16 V0 = 0, }; -IStatistics::IStatistics(const SingleStatisticsDescription & stat_) : stat(stat_) {} +IStatistics::IStatistics(const SingleStatisticsDescription & stat_) + : stat(stat_) +{ +} ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_) : stats_desc(stats_desc_) diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index 5e756e48d42..4af7c423257 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -23,11 +23,11 @@ public: explicit IStatistics(const SingleStatisticsDescription & stat_); virtual ~IStatistics() = default; + virtual void update(const ColumnPtr & column) = 0; + virtual void serialize(WriteBuffer & buf) = 0; virtual void deserialize(ReadBuffer & buf) = 0; - virtual void update(const ColumnPtr & column) = 0; - protected: SingleStatisticsDescription stat; }; diff --git a/src/Storages/Statistics/TDigestStatistics.cpp b/src/Storages/Statistics/TDigestStatistics.cpp index 2f254b604e4..0e2cc8bac6d 100644 --- a/src/Storages/Statistics/TDigestStatistics.cpp +++ b/src/Storages/Statistics/TDigestStatistics.cpp @@ -13,14 +13,16 @@ TDigestStatistics::TDigestStatistics(const SingleStatisticsDescription & stat_) { } -Float64 TDigestStatistics::estimateLess(Float64 val) const +void TDigestStatistics::update(const ColumnPtr & column) { - return t_digest.getCountLessThan(val); -} + size_t rows = column->size(); -Float64 TDigestStatistics::estimateEqual(Float64 val) const -{ - return t_digest.getCountEqual(val); + for (size_t row = 0; row < rows; ++row) + { + /// TODO: support more types. + Float64 value = column->getFloat64(row); + t_digest.add(value, 1); + } } void TDigestStatistics::serialize(WriteBuffer & buf) @@ -33,16 +35,14 @@ void TDigestStatistics::deserialize(ReadBuffer & buf) t_digest.deserialize(buf); } -void TDigestStatistics::update(const ColumnPtr & column) +Float64 TDigestStatistics::estimateLess(Float64 val) const { - size_t rows = column->size(); + return t_digest.getCountLessThan(val); +} - for (size_t row = 0; row < rows; ++row) - { - /// TODO: support more types. - Float64 value = column->getFloat64(row); - t_digest.add(value, 1); - } +Float64 TDigestStatistics::estimateEqual(Float64 val) const +{ + return t_digest.getCountEqual(val); } void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) diff --git a/src/Storages/Statistics/TDigestStatistics.h b/src/Storages/Statistics/TDigestStatistics.h index 2e29becc5ee..a9fbc0410f3 100644 --- a/src/Storages/Statistics/TDigestStatistics.h +++ b/src/Storages/Statistics/TDigestStatistics.h @@ -11,13 +11,14 @@ class TDigestStatistics : public IStatistics public: explicit TDigestStatistics(const SingleStatisticsDescription & stat_); - Float64 estimateLess(Float64 val) const; - Float64 estimateEqual(Float64 val) const; + void update(const ColumnPtr & column) override; void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; - void update(const ColumnPtr & column) override; + Float64 estimateLess(Float64 val) const; + Float64 estimateEqual(Float64 val) const; + private: QuantileTDigest t_digest; }; diff --git a/src/Storages/Statistics/UniqStatistics.cpp b/src/Storages/Statistics/UniqStatistics.cpp index 2f7a75db504..267654656cd 100644 --- a/src/Storages/Statistics/UniqStatistics.cpp +++ b/src/Storages/Statistics/UniqStatistics.cpp @@ -25,11 +25,13 @@ UniqStatistics::~UniqStatistics() collector->destroy(data); } -UInt64 UniqStatistics::getCardinality() +void UniqStatistics::update(const ColumnPtr & column) { - auto column = DataTypeUInt64().createColumn(); - collector->insertResultInto(data, *column, nullptr); - return column->getUInt(0); + /// TODO(hanfei): For low cardinality, it's very slow to convert to full column. We can read the dictionary directly. + /// Here we intend to avoid crash in CI. + auto col_ptr = column->convertToFullColumnIfLowCardinality(); + const IColumn * raw_ptr = col_ptr.get(); + collector->addBatchSinglePlace(0, column->size(), data, &(raw_ptr), nullptr); } void UniqStatistics::serialize(WriteBuffer & buf) @@ -42,13 +44,11 @@ void UniqStatistics::deserialize(ReadBuffer & buf) collector->deserialize(data, buf); } -void UniqStatistics::update(const ColumnPtr & column) +UInt64 UniqStatistics::getCardinality() { - /// TODO(hanfei): For low cardinality, it's very slow to convert to full column. We can read the dictionary directly. - /// Here we intend to avoid crash in CI. - auto col_ptr = column->convertToFullColumnIfLowCardinality(); - const IColumn * raw_ptr = col_ptr.get(); - collector->addBatchSinglePlace(0, column->size(), data, &(raw_ptr), nullptr); + auto column = DataTypeUInt64().createColumn(); + collector->insertResultInto(data, *column, nullptr); + return column->getUInt(0); } void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) diff --git a/src/Storages/Statistics/UniqStatistics.h b/src/Storages/Statistics/UniqStatistics.h index bf097620a86..4f28f80f9cb 100644 --- a/src/Storages/Statistics/UniqStatistics.h +++ b/src/Storages/Statistics/UniqStatistics.h @@ -11,18 +11,16 @@ class UniqStatistics : public IStatistics { public: UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); - ~UniqStatistics() override; - UInt64 getCardinality(); + void update(const ColumnPtr & column) override; void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; - void update(const ColumnPtr & column) override; + UInt64 getCardinality(); private: - std::unique_ptr arena; AggregateFunctionPtr collector; AggregateDataPtr data; From 9f4e44bfc44a00dde015410d0c62e71b7cc000d1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 10:02:29 +0000 Subject: [PATCH 276/439] Rename XStatistics to StatisticsX Makes the naming more consistent with the rest of the codebase, e.g. - MergeTreeIndexSet - MergeTreeIndexMinMax or - StorageJoin - StorageMergeTree etc. --- src/Storages/Statistics/Statistics.cpp | 12 ++++++------ ...igestStatistics.cpp => StatisticsTDigest.cpp} | 16 ++++++++-------- .../{TDigestStatistics.h => StatisticsTDigest.h} | 4 ++-- .../{UniqStatistics.cpp => StatisticsUniq.cpp} | 16 ++++++++-------- .../{UniqStatistics.h => StatisticsUniq.h} | 6 +++--- src/Storages/Statistics/tests/gtest_stats.cpp | 2 +- 6 files changed, 28 insertions(+), 28 deletions(-) rename src/Storages/Statistics/{TDigestStatistics.cpp => StatisticsTDigest.cpp} (67%) rename src/Storages/Statistics/{TDigestStatistics.h => StatisticsTDigest.h} (84%) rename src/Storages/Statistics/{UniqStatistics.cpp => StatisticsUniq.cpp} (79%) rename src/Storages/Statistics/{UniqStatistics.h => StatisticsUniq.h} (82%) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index 5666f0bbf18..c454adccc06 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -1,7 +1,7 @@ #include #include -#include -#include +#include +#include #include #include #include @@ -44,7 +44,7 @@ void ColumnStatistics::update(const ColumnPtr & column) Float64 ColumnStatistics::estimateLess(Float64 val) const { if (stats.contains(StatisticsType::TDigest)) - return std::static_pointer_cast(stats.at(StatisticsType::TDigest))->estimateLess(val); + return std::static_pointer_cast(stats.at(StatisticsType::TDigest))->estimateLess(val); return rows * ConditionSelectivityEstimator::default_normal_cond_factor; } @@ -57,12 +57,12 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const { if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) { - auto uniq_static = std::static_pointer_cast(stats.at(StatisticsType::Uniq)); + auto statistics_uniq = std::static_pointer_cast(stats.at(StatisticsType::Uniq)); /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) /// for every bucket. - if (uniq_static->getCardinality() < 2048) + if (statistics_uniq->getCardinality() < 2048) { - auto tdigest_static = std::static_pointer_cast(stats.at(StatisticsType::TDigest)); + auto tdigest_static = std::static_pointer_cast(stats.at(StatisticsType::TDigest)); return tdigest_static->estimateEqual(val); } } diff --git a/src/Storages/Statistics/TDigestStatistics.cpp b/src/Storages/Statistics/StatisticsTDigest.cpp similarity index 67% rename from src/Storages/Statistics/TDigestStatistics.cpp rename to src/Storages/Statistics/StatisticsTDigest.cpp index 0e2cc8bac6d..0747197370c 100644 --- a/src/Storages/Statistics/TDigestStatistics.cpp +++ b/src/Storages/Statistics/StatisticsTDigest.cpp @@ -1,4 +1,4 @@ -#include +#include #include namespace DB @@ -8,12 +8,12 @@ namespace ErrorCodes extern const int ILLEGAL_STATISTICS; } -TDigestStatistics::TDigestStatistics(const SingleStatisticsDescription & stat_) +StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_) : IStatistics(stat_) { } -void TDigestStatistics::update(const ColumnPtr & column) +void StatisticsTDigest::update(const ColumnPtr & column) { size_t rows = column->size(); @@ -25,22 +25,22 @@ void TDigestStatistics::update(const ColumnPtr & column) } } -void TDigestStatistics::serialize(WriteBuffer & buf) +void StatisticsTDigest::serialize(WriteBuffer & buf) { t_digest.serialize(buf); } -void TDigestStatistics::deserialize(ReadBuffer & buf) +void StatisticsTDigest::deserialize(ReadBuffer & buf) { t_digest.deserialize(buf); } -Float64 TDigestStatistics::estimateLess(Float64 val) const +Float64 StatisticsTDigest::estimateLess(Float64 val) const { return t_digest.getCountLessThan(val); } -Float64 TDigestStatistics::estimateEqual(Float64 val) const +Float64 StatisticsTDigest::estimateEqual(Float64 val) const { return t_digest.getCountEqual(val); } @@ -54,7 +54,7 @@ void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) { - return std::make_shared(stat); + return std::make_shared(stat); } } diff --git a/src/Storages/Statistics/TDigestStatistics.h b/src/Storages/Statistics/StatisticsTDigest.h similarity index 84% rename from src/Storages/Statistics/TDigestStatistics.h rename to src/Storages/Statistics/StatisticsTDigest.h index a9fbc0410f3..f391d0b17e6 100644 --- a/src/Storages/Statistics/TDigestStatistics.h +++ b/src/Storages/Statistics/StatisticsTDigest.h @@ -6,10 +6,10 @@ namespace DB { -class TDigestStatistics : public IStatistics +class StatisticsTDigest : public IStatistics { public: - explicit TDigestStatistics(const SingleStatisticsDescription & stat_); + explicit StatisticsTDigest(const SingleStatisticsDescription & stat_); void update(const ColumnPtr & column) override; diff --git a/src/Storages/Statistics/UniqStatistics.cpp b/src/Storages/Statistics/StatisticsUniq.cpp similarity index 79% rename from src/Storages/Statistics/UniqStatistics.cpp rename to src/Storages/Statistics/StatisticsUniq.cpp index 267654656cd..4e24e5f0e96 100644 --- a/src/Storages/Statistics/UniqStatistics.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -10,7 +10,7 @@ namespace ErrorCodes extern const int ILLEGAL_STATISTICS; } -UniqStatistics::UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type) +StatisticsUniq::StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type) : IStatistics(stat_) { arena = std::make_unique(); @@ -20,12 +20,12 @@ UniqStatistics::UniqStatistics(const SingleStatisticsDescription & stat_, const collector->create(data); } -UniqStatistics::~UniqStatistics() +StatisticsUniq::~StatisticsUniq() { collector->destroy(data); } -void UniqStatistics::update(const ColumnPtr & column) +void StatisticsUniq::update(const ColumnPtr & column) { /// TODO(hanfei): For low cardinality, it's very slow to convert to full column. We can read the dictionary directly. /// Here we intend to avoid crash in CI. @@ -34,17 +34,17 @@ void UniqStatistics::update(const ColumnPtr & column) collector->addBatchSinglePlace(0, column->size(), data, &(raw_ptr), nullptr); } -void UniqStatistics::serialize(WriteBuffer & buf) +void StatisticsUniq::serialize(WriteBuffer & buf) { collector->serialize(data, buf); } -void UniqStatistics::deserialize(ReadBuffer & buf) +void StatisticsUniq::deserialize(ReadBuffer & buf) { collector->deserialize(data, buf); } -UInt64 UniqStatistics::getCardinality() +UInt64 StatisticsUniq::getCardinality() { auto column = DataTypeUInt64().createColumn(); collector->insertResultInto(data, *column, nullptr); @@ -60,7 +60,7 @@ void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) { - return std::make_shared(stat, data_type); + return std::make_shared(stat, data_type); } } diff --git a/src/Storages/Statistics/UniqStatistics.h b/src/Storages/Statistics/StatisticsUniq.h similarity index 82% rename from src/Storages/Statistics/UniqStatistics.h rename to src/Storages/Statistics/StatisticsUniq.h index 4f28f80f9cb..1c521fa9984 100644 --- a/src/Storages/Statistics/UniqStatistics.h +++ b/src/Storages/Statistics/StatisticsUniq.h @@ -7,11 +7,11 @@ namespace DB { -class UniqStatistics : public IStatistics +class StatisticsUniq : public IStatistics { public: - UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); - ~UniqStatistics() override; + StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); + ~StatisticsUniq() override; void update(const ColumnPtr & column) override; diff --git a/src/Storages/Statistics/tests/gtest_stats.cpp b/src/Storages/Statistics/tests/gtest_stats.cpp index f94f310be56..c3c14632ba1 100644 --- a/src/Storages/Statistics/tests/gtest_stats.cpp +++ b/src/Storages/Statistics/tests/gtest_stats.cpp @@ -1,6 +1,6 @@ #include -#include +#include TEST(Statistics, TDigestLessThan) { From 2cefa56f9b640f14b020660ec0296fe7bb6669a9 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 19:59:52 +0000 Subject: [PATCH 277/439] Update docs --- docs/en/engines/table-engines/mergetree-family/mergetree.md | 6 +++--- docs/en/sql-reference/statements/alter/statistics.md | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index f0c4e1b0e34..3826e4e9c94 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -993,11 +993,11 @@ They can be used for prewhere optimization only if we enable `set allow_statisti - `TDigest` - Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch. + [TDigest](https://github.com/tdunning/t-digest) sketches which allow to compute approximate percentiles (e.g. the 90th percentile) for numeric columns. - `Uniq` - - Estimate the number of distinct values of a column by HyperLogLog. + + [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains. ## Column-level Settings {#column-level-settings} diff --git a/docs/en/sql-reference/statements/alter/statistics.md b/docs/en/sql-reference/statements/alter/statistics.md index 80024781f88..6880cef0e5c 100644 --- a/docs/en/sql-reference/statements/alter/statistics.md +++ b/docs/en/sql-reference/statements/alter/statistics.md @@ -28,6 +28,6 @@ There is an example adding two statistics types to two columns: ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq; ``` -:::note +:::note Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). ::: From bd7e613b3b21589710e11b043577959ce340e2c5 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 30 Jun 2024 10:27:40 +0000 Subject: [PATCH 278/439] Minor cleanup of 02864_statistics_exception --- .../02864_statistics_exception.sql | 78 +++++++++---------- 1 file changed, 38 insertions(+), 40 deletions(-) diff --git a/tests/queries/0_stateless/02864_statistics_exception.sql b/tests/queries/0_stateless/02864_statistics_exception.sql index c531d39cd69..289ffee6600 100644 --- a/tests/queries/0_stateless/02864_statistics_exception.sql +++ b/tests/queries/0_stateless/02864_statistics_exception.sql @@ -1,57 +1,55 @@ -DROP TABLE IF EXISTS t1; +-- Tests creating/dropping/materializing statistics produces the right exceptions. -CREATE TABLE t1 +DROP TABLE IF EXISTS tab; + +-- Can't create statistics when allow_experimental_statistics = 0 +CREATE TABLE tab ( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - pk String, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + a Float64 STATISTICS(tdigest) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } SET allow_experimental_statistics = 1; -CREATE TABLE t1 +-- The same type of statistics can't exist more than once on a column +CREATE TABLE tab ( - a Float64 STATISTICS(tdigest), - b Int64, - pk String STATISTICS(tdigest), -) Engine = MergeTree() ORDER BY pk; -- { serverError ILLEGAL_STATISTICS } + a Float64 STATISTICS(tdigest, tdigest) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } -CREATE TABLE t1 +-- Unknown statistics types are rejected +CREATE TABLE tab ( - a Float64 STATISTICS(tdigest, tdigest(10)), - b Int64, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + a Float64 STATISTICS(no_statistics_type) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } -CREATE TABLE t1 +-- tDigest statistics can only be created on numeric columns +CREATE TABLE tab ( - a Float64 STATISTICS(xyz), - b Int64, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + a String STATISTICS(tdigest), +) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE t1 +CREATE TABLE tab ( a Float64, - b Int64, - pk String, -) Engine = MergeTree() ORDER BY pk; + b String +) Engine = MergeTree() ORDER BY tuple(); -ALTER TABLE t1 ADD STATISTICS a TYPE xyz; -- { serverError INCORRECT_QUERY } -ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS IF NOT EXISTS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS a TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS a TYPE tdigest; +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; -- Statistics can be created only on integer columns -ALTER TABLE t1 MODIFY STATISTICS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS pk TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 DROP STATISTICS a; -ALTER TABLE t1 DROP STATISTICS IF EXISTS a; -ALTER TABLE t1 CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 CLEAR STATISTICS IF EXISTS a; -ALTER TABLE t1 MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS b TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS a; +ALTER TABLE tab DROP STATISTICS IF EXISTS a; +ALTER TABLE tab CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab CLEAR STATISTICS IF EXISTS a; +ALTER TABLE tab MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS b TYPE tdigest; -ALTER TABLE t1 MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; -ALTER TABLE t1 MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; +ALTER TABLE tab MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; +ALTER TABLE tab MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } -DROP TABLE t1; +DROP TABLE tab; From c390ecdb4df7e090ea94f10adda47dd73864c71c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 30 Jun 2024 10:11:04 +0000 Subject: [PATCH 279/439] Rename 02864_statistics_operate --> 02864_statistics_ddl --- .../02864_statistics_ddl.reference | 31 ++++++++++ .../0_stateless/02864_statistics_ddl.sql | 59 +++++++++++++++++++ .../02864_statistics_operate.reference | 31 ---------- .../0_stateless/02864_statistics_operate.sql | 57 ------------------ 4 files changed, 90 insertions(+), 88 deletions(-) create mode 100644 tests/queries/0_stateless/02864_statistics_ddl.reference create mode 100644 tests/queries/0_stateless/02864_statistics_ddl.sql delete mode 100644 tests/queries/0_stateless/02864_statistics_operate.reference delete mode 100644 tests/queries/0_stateless/02864_statistics_operate.sql diff --git a/tests/queries/0_stateless/02864_statistics_ddl.reference b/tests/queries/0_stateless/02864_statistics_ddl.reference new file mode 100644 index 00000000000..a7ff5caa0b0 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_ddl.reference @@ -0,0 +1,31 @@ +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After insert + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +10 +0 +After drop statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10), less(a, 10)) (removed) +10 +CREATE TABLE default.tab\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After add statistic +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After materialize statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +20 +After merge + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +20 +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After rename + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(c, 10)) (removed) +20 diff --git a/tests/queries/0_stateless/02864_statistics_ddl.sql b/tests/queries/0_stateless/02864_statistics_ddl.sql new file mode 100644 index 00000000000..fe612efe2ac --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_ddl.sql @@ -0,0 +1,59 @@ +-- Tests that various DDL statements create/drop/materialize statistics + +DROP TABLE IF EXISTS tab; + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; + +CREATE TABLE tab +( + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest), + pk String, +) Engine = MergeTree() ORDER BY pk +SETTINGS min_bytes_for_wide_part = 0; + +SHOW CREATE TABLE tab; + +INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; + +SELECT 'After insert'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; +SELECT count(*) FROM tab WHERE b < NULL and a < '10'; + +ALTER TABLE tab DROP STATISTICS a, b; + +SELECT 'After drop statistic'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +SHOW CREATE TABLE tab; + +ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest; + +SELECT 'After add statistic'; + +SHOW CREATE TABLE tab; + +ALTER TABLE tab MATERIALIZE STATISTICS a, b; +INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; + +SELECT 'After materialize statistic'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +OPTIMIZE TABLE tab FINAL; + +SELECT 'After merge'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +ALTER TABLE tab RENAME COLUMN b TO c; +SHOW CREATE TABLE tab; + +SELECT 'After rename'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE c < 10 and a < 10; + +DROP TABLE IF EXISTS tab; diff --git a/tests/queries/0_stateless/02864_statistics_operate.reference b/tests/queries/0_stateless/02864_statistics_operate.reference deleted file mode 100644 index 6398a9bd000..00000000000 --- a/tests/queries/0_stateless/02864_statistics_operate.reference +++ /dev/null @@ -1,31 +0,0 @@ -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After insert - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -10 -0 -After drop statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(b, 10), less(a, 10)) (removed) -10 -CREATE TABLE default.t1\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After add statistic -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After materialize statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -After merge - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After rename - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(c, 10)) (removed) -20 diff --git a/tests/queries/0_stateless/02864_statistics_operate.sql b/tests/queries/0_stateless/02864_statistics_operate.sql deleted file mode 100644 index bf69c11bc91..00000000000 --- a/tests/queries/0_stateless/02864_statistics_operate.sql +++ /dev/null @@ -1,57 +0,0 @@ -DROP TABLE IF EXISTS t1; - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; - -CREATE TABLE t1 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; - -SHOW CREATE TABLE t1; - -INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After insert'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; -SELECT count(*) FROM t1 WHERE b < NULL and a < '10'; - -ALTER TABLE t1 DROP STATISTICS a, b; - -SELECT 'After drop statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; - -SHOW CREATE TABLE t1; - -ALTER TABLE t1 ADD STATISTICS a, b TYPE tdigest; - -SELECT 'After add statistic'; - -SHOW CREATE TABLE t1; - -ALTER TABLE t1 MATERIALIZE STATISTICS a, b; -INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After materialize statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; - -OPTIMIZE TABLE t1 FINAL; - -SELECT 'After merge'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; - -ALTER TABLE t1 RENAME COLUMN b TO c; -SHOW CREATE TABLE t1; - -SELECT 'After rename'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE c < 10 and a < 10; - -DROP TABLE IF EXISTS t1; From 4f0916caa5ee07c3c612641782288ee42adfd92a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 30 Jun 2024 09:56:38 +0000 Subject: [PATCH 280/439] Rename test 03164_materialize_statistics --> 02864_statistics_materialize_in_merge Consistency with existing statistics tests - 02864_statistics_operate - 02864_statistics_exception - 02864_statistics_uniq --- ...statistics_materialize_in_merge.reference} | 0 .../02864_statistics_materialize_in_merge.sql | 52 +++++++++++++++++++ .../03164_materialize_statistics.sql | 49 ----------------- 3 files changed, 52 insertions(+), 49 deletions(-) rename tests/queries/0_stateless/{03164_materialize_statistics.reference => 02864_statistics_materialize_in_merge.reference} (100%) create mode 100644 tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql delete mode 100644 tests/queries/0_stateless/03164_materialize_statistics.sql diff --git a/tests/queries/0_stateless/03164_materialize_statistics.reference b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference similarity index 100% rename from tests/queries/0_stateless/03164_materialize_statistics.reference rename to tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference diff --git a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql new file mode 100644 index 00000000000..3e15ec1148e --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql @@ -0,0 +1,52 @@ +-- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0'). + +DROP TABLE IF EXISTS tab; + +SET allow_experimental_analyzer = 1; +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; + +SET materialize_statistics_on_insert = 0; + +CREATE TABLE tab +( + a Int64 STATISTICS(tdigest), + b Int16 STATISTICS(tdigest), +) ENGINE = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. + +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used'; + +OPTIMIZE TABLE tab FINAL; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge'; + +TRUNCATE TABLE tab; +SET mutations_sync = 2; + +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; +ALTER TABLE tab MATERIALIZE STATISTICS a, b; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize'; + +DROP TABLE tab; + +SYSTEM FLUSH LOGS; + +SELECT log_comment, message FROM system.text_log JOIN +( + SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log + WHERE current_database = currentDatabase() + AND query LIKE 'SELECT count(*) FROM tab%' + AND type = 'QueryFinish' +) AS query_log USING (query_id) +WHERE message LIKE '%moved to PREWHERE%' +ORDER BY event_time_microseconds; + +SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) +FROM system.query_log +WHERE current_database = currentDatabase() + AND query LIKE 'INSERT INTO tab SELECT%' + AND type = 'QueryFinish'; diff --git a/tests/queries/0_stateless/03164_materialize_statistics.sql b/tests/queries/0_stateless/03164_materialize_statistics.sql deleted file mode 100644 index 43c5724dd59..00000000000 --- a/tests/queries/0_stateless/03164_materialize_statistics.sql +++ /dev/null @@ -1,49 +0,0 @@ -DROP TABLE IF EXISTS t_statistics_materialize; - -SET allow_experimental_analyzer = 1; -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; -SET materialize_statistics_on_insert = 0; - -CREATE TABLE t_statistics_materialize -( - a Int64 STATISTICS(tdigest), - b Int16 STATISTICS(tdigest), -) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. - -INSERT INTO t_statistics_materialize SELECT number, -number FROM system.numbers LIMIT 10000; - -SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used'; - -OPTIMIZE TABLE t_statistics_materialize FINAL; - -SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge'; - -TRUNCATE TABLE t_statistics_materialize; -SET mutations_sync = 2; - -INSERT INTO t_statistics_materialize SELECT number, -number FROM system.numbers LIMIT 10000; -ALTER TABLE t_statistics_materialize MATERIALIZE STATISTICS a, b; - -SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize'; - -DROP TABLE t_statistics_materialize; - -SYSTEM FLUSH LOGS; - -SELECT log_comment, message FROM system.text_log JOIN -( - SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log - WHERE current_database = currentDatabase() - AND query LIKE 'SELECT count(*) FROM t_statistics_materialize%' - AND type = 'QueryFinish' -) AS query_log USING (query_id) -WHERE message LIKE '%moved to PREWHERE%' -ORDER BY event_time_microseconds; - -SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) -FROM system.query_log -WHERE current_database = currentDatabase() - AND query LIKE 'INSERT INTO t_statistics_materialize SELECT%' - AND type = 'QueryFinish'; From 5f53a73457f0851f78b82e2d5559f4b654eaf6e7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 3 Jul 2024 12:05:49 +0200 Subject: [PATCH 281/439] Fix config merging for from_env with replace overrides Without this patch new test fails with: Exception: Failed to preprocess config '/etc/clickhouse-server/config.xml': Exception: Element has value and does not have 'replace' attribute, can't process from_env substitution. Stack trace: Signed-off-by: Azat Khuzhin --- src/Common/Config/ConfigProcessor.cpp | 1 - .../configs/000-server_overrides.xml | 3 ++ ...subst.xml => 000-users_with_env_subst.xml} | 0 .../configs/010-server_with_env_subst.xml | 3 ++ .../test_config_substitutions/test.py | 30 ++++++++++++++++--- 5 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 tests/integration/test_config_substitutions/configs/000-server_overrides.xml rename tests/integration/test_config_substitutions/configs/{000-config_with_env_subst.xml => 000-users_with_env_subst.xml} (100%) create mode 100644 tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index c9832e8efd5..67d6036aa51 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -316,7 +316,6 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, } else if (replace) { - with_element.removeAttribute("replace"); NodePtr new_node = config->importNode(with_node, true); config_root->replaceChild(new_node, config_node); } diff --git a/tests/integration/test_config_substitutions/configs/000-server_overrides.xml b/tests/integration/test_config_substitutions/configs/000-server_overrides.xml new file mode 100644 index 00000000000..9335f663d68 --- /dev/null +++ b/tests/integration/test_config_substitutions/configs/000-server_overrides.xml @@ -0,0 +1,3 @@ + + 10000 + diff --git a/tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml b/tests/integration/test_config_substitutions/configs/000-users_with_env_subst.xml similarity index 100% rename from tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml rename to tests/integration/test_config_substitutions/configs/000-users_with_env_subst.xml diff --git a/tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml b/tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml new file mode 100644 index 00000000000..ea91f066a21 --- /dev/null +++ b/tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml @@ -0,0 +1,3 @@ + + + diff --git a/tests/integration/test_config_substitutions/test.py b/tests/integration/test_config_substitutions/test.py index faceab6fbcd..124dbcaedf7 100644 --- a/tests/integration/test_config_substitutions/test.py +++ b/tests/integration/test_config_substitutions/test.py @@ -39,9 +39,13 @@ node6 = cluster.add_instance( node7 = cluster.add_instance( "node7", user_configs=[ - "configs/000-config_with_env_subst.xml", + "configs/000-users_with_env_subst.xml", "configs/010-env_subst_override.xml", ], + main_configs=[ + "configs/000-server_overrides.xml", + "configs/010-server_with_env_subst.xml", + ], env_variables={ # overridden with 424242 "MAX_QUERY_SIZE": "121212", @@ -126,9 +130,9 @@ def test_config(start_cluster): ) -def test_config_invalid_overrides(start_cluster): +def test_config_from_env_overrides(start_cluster): node7.replace_config( - "/etc/clickhouse-server/users.d/000-config_with_env_subst.xml", + "/etc/clickhouse-server/users.d/000-users_with_env_subst.xml", """ @@ -156,7 +160,7 @@ def test_config_invalid_overrides(start_cluster): ): node7.query("SYSTEM RELOAD CONFIG") node7.replace_config( - "/etc/clickhouse-server/users.d/000-config_with_env_subst.xml", + "/etc/clickhouse-server/users.d/000-users_with_env_subst.xml", """ @@ -181,6 +185,24 @@ def test_config_invalid_overrides(start_cluster): node7.query("SYSTEM RELOAD CONFIG") +def test_config_merge_from_env_overrides(start_cluster): + assert ( + node7.query( + "SELECT value FROM system.server_settings WHERE name='max_thread_pool_size'" + ) + == "10000\n" + ) + node7.replace_config( + "/etc/clickhouse-server/config.d/010-server_with_env_subst.xml", + """ + + 9000 + +""", + ) + node7.query("SYSTEM RELOAD CONFIG") + + def test_include_config(start_cluster): # assert node4.query("select 1") From 6ccb26b1aa6c1d79e0388a72afd4b0f9edb2ea1c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 10:26:19 +0000 Subject: [PATCH 282/439] Switch to a virtual interface to get rid of static_pointer_cast --- src/Storages/Statistics/Statistics.cpp | 42 ++++++++++++++++----- src/Storages/Statistics/Statistics.h | 9 +++++ src/Storages/Statistics/StatisticsTDigest.h | 4 +- src/Storages/Statistics/StatisticsUniq.cpp | 2 +- src/Storages/Statistics/StatisticsUniq.h | 2 +- 5 files changed, 45 insertions(+), 14 deletions(-) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index c454adccc06..28e75c6d244 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -41,10 +41,35 @@ void ColumnStatistics::update(const ColumnPtr & column) stat.second->update(column); } +UInt64 IStatistics::estimateCardinality() const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cardinality estimation is not implemented for this type of statistics"); +} + +Float64 IStatistics::estimateEqual(Float64 /*val*/) const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Equality estimation is not implemented for this type of statistics"); +} + +Float64 IStatistics::estimateLess(Float64 /*val*/) const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics"); +} + +/// ------------------------------------- +/// Implementation of the estimation: +/// Note: Each statistics object supports certain types predicates natively, e.g. +/// - TDigest: '< X' (less-than predicates) +/// - Count-min sketches: '= X' (equal predicates) +/// - Uniq (HyperLogLog): 'count distinct(*)' (column cardinality) +/// If multiple statistics objects are available per column, it is sometimes also possible to combine them in a clever way. +/// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics +/// object that supports it natively. + Float64 ColumnStatistics::estimateLess(Float64 val) const { if (stats.contains(StatisticsType::TDigest)) - return std::static_pointer_cast(stats.at(StatisticsType::TDigest))->estimateLess(val); + return stats.at(StatisticsType::TDigest)->estimateLess(val); return rows * ConditionSelectivityEstimator::default_normal_cond_factor; } @@ -57,14 +82,9 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const { if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) { - auto statistics_uniq = std::static_pointer_cast(stats.at(StatisticsType::Uniq)); - /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) - /// for every bucket. - if (statistics_uniq->getCardinality() < 2048) - { - auto tdigest_static = std::static_pointer_cast(stats.at(StatisticsType::TDigest)); - return tdigest_static->estimateEqual(val); - } + /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket. + if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048) + return stats.at(StatisticsType::TDigest)->estimateEqual(val); } if (val < - ConditionSelectivityEstimator::threshold || val > ConditionSelectivityEstimator::threshold) return rows * ConditionSelectivityEstimator::default_normal_cond_factor; @@ -72,6 +92,8 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const return rows * ConditionSelectivityEstimator::default_good_cond_factor; } +/// ------------------------------------- + void ColumnStatistics::serialize(WriteBuffer & buf) { writeIntBinary(V0, buf); @@ -81,7 +103,7 @@ void ColumnStatistics::serialize(WriteBuffer & buf) stat_types_mask |= 1 << UInt8(type); writeIntBinary(stat_types_mask, buf); - /// store the column row count as it is always useful + /// as the column row count is always useful, save it in any case writeIntBinary(rows, buf); /// write the actual statistics object diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index 4af7c423257..d4364075d1c 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -28,6 +28,15 @@ public: virtual void serialize(WriteBuffer & buf) = 0; virtual void deserialize(ReadBuffer & buf) = 0; + /// Estimate the cardinality of the column. + /// Throws if the statistics object is not able to do a meaningful estimation. + virtual UInt64 estimateCardinality() const; + + /// Per-value estimations. + /// Throws if the statistics object is not able to do a meaningful estimation. + virtual Float64 estimateEqual(Float64 val) const; /// cardinality of val in the column + virtual Float64 estimateLess(Float64 val) const; /// summarized cardinality of values < val in the column + protected: SingleStatisticsDescription stat; }; diff --git a/src/Storages/Statistics/StatisticsTDigest.h b/src/Storages/Statistics/StatisticsTDigest.h index f391d0b17e6..d3a3bf115ee 100644 --- a/src/Storages/Statistics/StatisticsTDigest.h +++ b/src/Storages/Statistics/StatisticsTDigest.h @@ -16,8 +16,8 @@ public: void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; - Float64 estimateLess(Float64 val) const; - Float64 estimateEqual(Float64 val) const; + Float64 estimateLess(Float64 val) const override; + Float64 estimateEqual(Float64 val) const override; private: QuantileTDigest t_digest; diff --git a/src/Storages/Statistics/StatisticsUniq.cpp b/src/Storages/Statistics/StatisticsUniq.cpp index 4e24e5f0e96..bf9a40ea8cb 100644 --- a/src/Storages/Statistics/StatisticsUniq.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -44,7 +44,7 @@ void StatisticsUniq::deserialize(ReadBuffer & buf) collector->deserialize(data, buf); } -UInt64 StatisticsUniq::getCardinality() +UInt64 StatisticsUniq::estimateCardinality() const { auto column = DataTypeUInt64().createColumn(); collector->insertResultInto(data, *column, nullptr); diff --git a/src/Storages/Statistics/StatisticsUniq.h b/src/Storages/Statistics/StatisticsUniq.h index 1c521fa9984..5290585bd94 100644 --- a/src/Storages/Statistics/StatisticsUniq.h +++ b/src/Storages/Statistics/StatisticsUniq.h @@ -18,7 +18,7 @@ public: void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; - UInt64 getCardinality(); + UInt64 estimateCardinality() const override; private: std::unique_ptr arena; From 10d48afc20d23043c8e89bd804259b19247dc03c Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 13:16:01 +0200 Subject: [PATCH 283/439] token_info is defined always --- .../MergeTree/ReplicatedMergeTreeSink.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 3677f5b02ab..dedb4a9ddae 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -297,16 +297,13 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) String block_dedup_token; auto token_info = chunk.getChunkInfos().get(); - if constexpr (!async_insert) - { - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", - storage.getStorageID().getNameForLogs()); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); - if (token_info->isDefined()) - block_dedup_token = token_info->getToken(); - } + if (token_info->isDefined()) + block_dedup_token = token_info->getToken(); auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); From fe43ea27d2414a8b52d3002dad392bea5ee0b028 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Jul 2024 13:37:36 +0200 Subject: [PATCH 284/439] Tests: fix timeouts handling in case of github timeout termination --- docker/test/fasttest/run.sh | 15 ++++++++++++++- docker/test/stateless/run.sh | 8 ++++++-- docker/test/stateless/utils.lib | 2 +- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 4d5159cfa9e..2bed4c5c343 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -254,6 +254,19 @@ function configure rm -f "$FASTTEST_DATA/config.d/secure_ports.xml" } +function timeout_with_logging() { + local exit_code=0 + + timeout -s TERM --preserve-status "${@}" || exit_code="${?}" + + if [[ "${exit_code}" -eq "124" ]] + then + echo "The command 'timeout ${*}' has been killed by timeout" + fi + + return $exit_code +} + function run_tests { clickhouse-server --version @@ -315,7 +328,7 @@ case "$stage" in configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt" ;& "run_tests") - run_tests + timeout_with_logging 35m bash -c run_tests ||: /process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \ --out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \ --out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv" diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 3ce489b9e0e..2cbc5304212 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -6,6 +6,9 @@ source /setup_export_logs.sh # fail on errors, verbose and export all env variables set -e -x -a +MAX_RUN_TIME=${MAX_RUN_TIME:-10800} +MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME)) + # Choose random timezone for this test run. # # NOTE: that clickhouse-test will randomize session_timezone by itself as well @@ -262,14 +265,15 @@ function run_tests() export -f run_tests +TIMEOUT=$((${MAX_RUN_TIME} - 200)) if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. - timeout_with_logging "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ + timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ | sed 's/All tests have finished//' | sed 's/No tests were run//' ||: fi -timeout_with_logging "$MAX_RUN_TIME" bash -c run_tests ||: +timeout_with_logging "$TIMEOUT" bash -c run_tests ||: echo "Files in current directory" ls -la ./ diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib index 9b6ab535a90..833e1a05384 100644 --- a/docker/test/stateless/utils.lib +++ b/docker/test/stateless/utils.lib @@ -38,7 +38,7 @@ function fn_exists() { function timeout_with_logging() { local exit_code=0 - timeout "${@}" || exit_code="${?}" + timeout -s TERM --preserve-status "${@}" || exit_code="${?}" if [[ "${exit_code}" -eq "124" ]] then From dd28947e827ff9264b45e0a8130b3b44a685c097 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 3 Jul 2024 13:41:39 +0200 Subject: [PATCH 285/439] Update Settings.h --- src/Core/Settings.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b45c1e38d1c..1ab67a8e5b7 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -879,7 +879,10 @@ class IColumn; M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ \ - /** Experimental features */ \ + \ + /* ###################################### */ \ + /* ######## EXPERIMENTAL FEATURES ####### */ \ + /* ###################################### */ \ M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ @@ -948,7 +951,6 @@ class IColumn; /** End of experimental features */ - // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. From c08293026570e00f7f4332d7b0d4b3eb646db1d5 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 13:43:27 +0200 Subject: [PATCH 286/439] rename to buildPreAndSinkChains --- src/Interpreters/InterpreterInsertQuery.cpp | 6 +++--- src/Interpreters/InterpreterInsertQuery.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 5e8b8601f08..2becea61b3a 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -397,7 +397,7 @@ Chain InterpreterInsertQuery::buildPreSinkChain( return out; } -std::pair, std::vector> InterpreterInsertQuery::buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) +std::pair, std::vector> InterpreterInsertQuery::buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) { chassert(presink_streams > 0); chassert(sink_streams > 0); @@ -612,7 +612,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & sink_streams_size = 1; } - auto [presink_chains, sink_chains] = buildPreAndSyncChains( + auto [presink_chains, sink_chains] = buildPreAndSinkChains( presink_streams_size, sink_streams_size, table, metadata_snapshot, query_sample_block); @@ -673,7 +673,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query Chain chain; { - auto [presink_chains, sink_chains] = buildPreAndSyncChains( + auto [presink_chains, sink_chains] = buildPreAndSinkChains( 1, 1, table, metadata_snapshot, query_sample_block); diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index b06bb9a3db2..894c7c42144 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -79,7 +79,7 @@ private: std::vector> owned_buffers; - std::pair, std::vector> buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); + std::pair, std::vector> buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table); QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table); From b88be7260f6ce1eda9b949e1aa297eea5a2a110f Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Jul 2024 13:40:36 +0200 Subject: [PATCH 287/439] Tests: Eliminating the global tests queue to prevent clickhouse-test from hanging when a server dies --- tests/clickhouse-test | 121 +++++++++++------------------------------- 1 file changed, 30 insertions(+), 91 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 36870d59c3a..8e2a256fae2 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -34,10 +34,8 @@ import urllib.parse # for crc32 import zlib from argparse import ArgumentParser -from contextlib import closing from datetime import datetime, timedelta from errno import ESRCH -from queue import Full from subprocess import PIPE, Popen from time import sleep, time from typing import Dict, List, Optional, Set, Tuple, Union @@ -360,39 +358,6 @@ def clickhouse_execute_json( return rows -class Terminated(KeyboardInterrupt): - pass - - -def signal_handler(sig, frame): - raise Terminated(f"Terminated with {sig} signal") - - -def stop_tests(): - global stop_tests_triggered_lock - global stop_tests_triggered - global restarted_tests - - with stop_tests_triggered_lock: - print("Stopping tests") - if not stop_tests_triggered.is_set(): - stop_tests_triggered.set() - - # materialize multiprocessing.Manager().list() object before - # sending SIGTERM since this object is a proxy, that requires - # communicating with manager thread, but after SIGTERM will be - # send, this thread will die, and you will get - # ConnectionRefusedError error for any access to "restarted_tests" - # variable. - restarted_tests = [*restarted_tests] - - # send signal to all processes in group to avoid hung check triggering - # (to avoid terminating clickhouse-test itself, the signal should be ignored) - signal.signal(signal.SIGTERM, signal.SIG_IGN) - os.killpg(os.getpgid(os.getpid()), signal.SIGTERM) - signal.signal(signal.SIGTERM, signal.SIG_DFL) - - def get_db_engine(args, database_name): if args.replicated_database: return f" ON CLUSTER test_cluster_database_replicated \ @@ -2061,13 +2026,18 @@ class TestSuite: stop_time = None exit_code = None server_died = None -stop_tests_triggered_lock = None -stop_tests_triggered = None -queue = None multiprocessing_manager = None restarted_tests = None +class ServerDied(Exception): + pass + + +class GlobalTimeout(Exception): + pass + + def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): all_tests, num_tests, test_suite = all_tests_with_params global stop_time @@ -2122,24 +2092,17 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): print(f"\nRunning {about}{num_tests} {test_suite.suite} tests ({proc_name}).\n") while True: - if is_concurrent: - case = queue.get(timeout=args.timeout * 1.1) - if not case: - break + if all_tests: + case = all_tests.pop(0) else: - if all_tests: - case = all_tests.pop(0) - else: - break + break if server_died.is_set(): - stop_tests() - break + raise ServerDied("Server died") if stop_time and time() > stop_time: print("\nStop tests run because global time limit is exceeded.\n") - stop_tests() - break + raise GlobalTimeout("Stop tests run because global time limit is exceeded") test_case = TestCase(test_suite, case, args, is_concurrent) @@ -2182,18 +2145,15 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): failures_chain += 1 if test_result.reason == FailureReason.SERVER_DIED: server_died.set() - stop_tests() elif test_result.status == TestStatus.SKIPPED: skipped_total += 1 except KeyboardInterrupt as e: print(colored("Break tests execution", args, "red")) - stop_tests() raise e if failures_chain >= args.max_failures_chain: - stop_tests() - break + raise ServerDied("Max failures chain") if failures_total > 0: print( @@ -2390,7 +2350,7 @@ def extract_key(key: str) -> str: )[1] -def do_run_tests(jobs, test_suite: TestSuite, parallel): +def do_run_tests(jobs, test_suite: TestSuite): if jobs > 1 and len(test_suite.parallel_tests) > 0: print( "Found", @@ -2399,19 +2359,8 @@ def do_run_tests(jobs, test_suite: TestSuite, parallel): len(test_suite.sequential_tests), "sequential tests", ) - run_n, run_total = parallel.split("/") - run_n = float(run_n) - run_total = float(run_total) tests_n = len(test_suite.parallel_tests) - run_total = min(run_total, tests_n) - jobs = min(jobs, tests_n) - run_total = max(jobs, run_total) - - batch_size = max(1, len(test_suite.parallel_tests) // jobs) - parallel_tests_array = [] - for _ in range(jobs): - parallel_tests_array.append((None, batch_size, test_suite)) # If we don't do random shuffling then there will be always # nearly the same groups of test suites running concurrently. @@ -2424,25 +2373,21 @@ def do_run_tests(jobs, test_suite: TestSuite, parallel): # of failures will be nearly the same for all tests from the group. random.shuffle(test_suite.parallel_tests) + batch_size = max(1, len(test_suite.parallel_tests) // jobs) + parallel_tests_array = [] + for job in range(jobs): + range_ = job * batch_size, job * batch_size + batch_size + batch = test_suite.parallel_tests[range_[0] : range_[1]] + parallel_tests_array.append((batch, batch_size, test_suite)) + try: - with closing(multiprocessing.Pool(processes=jobs)) as pool: - pool.map_async(run_tests_array, parallel_tests_array) - - for suit in test_suite.parallel_tests: - queue.put(suit, timeout=args.timeout * 1.1) - - for _ in range(jobs): - queue.put(None, timeout=args.timeout * 1.1) - - queue.close() - except Full: - print( - "Couldn't put test to the queue within timeout. Server probably hung." - ) - print_stacktraces() - queue.close() - - pool.join() + with multiprocessing.Pool(processes=jobs) as pool: + future = pool.map_async(run_tests_array, parallel_tests_array) + future.wait() + finally: + pool.terminate() + pool.close() + pool.join() run_tests_array( (test_suite.sequential_tests, len(test_suite.sequential_tests), test_suite) @@ -2807,7 +2752,7 @@ def main(args): test_suite.cloud_skip_list = cloud_skip_list test_suite.private_skip_list = private_skip_list - total_tests_run += do_run_tests(args.jobs, test_suite, args.parallel) + total_tests_run += do_run_tests(args.jobs, test_suite) if server_died.is_set(): exit_code.value = 1 @@ -3268,9 +3213,6 @@ if __name__ == "__main__": stop_time = None exit_code = multiprocessing.Value("i", 0) server_died = multiprocessing.Event() - stop_tests_triggered_lock = multiprocessing.Lock() - stop_tests_triggered = multiprocessing.Event() - queue = multiprocessing.Queue(maxsize=1) multiprocessing_manager = multiprocessing.Manager() restarted_tests = multiprocessing_manager.list() @@ -3278,9 +3220,6 @@ if __name__ == "__main__": # infinite tests processes left # (new process group is required to avoid killing some parent processes) os.setpgid(0, 0) - signal.signal(signal.SIGTERM, signal_handler) - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGHUP, signal_handler) try: args = parse_args() From 8777363670dcc8775037f7104e90eea05f0fa0b2 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:47:45 +0200 Subject: [PATCH 288/439] Update src/Processors/Transforms/DeduplicationTokenTransforms.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Processors/Transforms/DeduplicationTokenTransforms.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 94287dc4487..d6aff9e1370 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -142,7 +142,7 @@ namespace DeduplicationToken String getName() const override { return "DeduplicationToken::DefineSourceWithChunkHashesTransform"; } - // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with heshes from the parts. + // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with hashes from the parts. // But if there is some table with different engine, we still need to define the source of the data in deduplication token // We use that transform to define the source as a hash of entire block in deduplication token void transform(Chunk & chunk) override; From 5c88d5b48ad75cbe3f8e15e428d8d24380c23943 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:47:55 +0200 Subject: [PATCH 289/439] Update src/Interpreters/InterpreterInsertQuery.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Interpreters/InterpreterInsertQuery.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2becea61b3a..15b9b155d54 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -828,7 +828,13 @@ void registerInterpreterInsertQuery(InterpreterFactory & factory) { auto create_fn = [] (const InterpreterFactory::Arguments & args) { - return std::make_unique(args.query, args.context, args.allow_materialized, false, false, false); + return std::make_unique( + args.query, + args.context, + args.allow_materialized, + /* no_squash */false, + /* no_destination */false, + /* async_insert */false); }; factory.registerInterpreter("InterpreterInsertQuery", create_fn); } From aee1289f2d89b83d5f5255792fe73784ec824ca1 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:48:04 +0200 Subject: [PATCH 290/439] Update src/Interpreters/InterpreterInsertQuery.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Interpreters/InterpreterInsertQuery.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 15b9b155d54..2581a368272 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -791,12 +791,8 @@ BlockIO InterpreterInsertQuery::execute() res.pipeline.addStorageHolder(table); - StoragePtr inner_table; if (const auto * mv = dynamic_cast(table.get())) - inner_table = mv->getTargetTable(); - - if (inner_table) - res.pipeline.addStorageHolder(inner_table); + res.pipeline.addStorageHolder(mv->getTargetTable()); return res; } From f0aa006461dd4118dada9c6262d53fc703d0af82 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:48:32 +0200 Subject: [PATCH 291/439] Update src/Interpreters/InterpreterInsertQuery.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2581a368272..f9b57f530f0 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -580,7 +580,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & if (!settings.insert_deduplication_token.value.empty()) { - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { return std::make_shared(settings.insert_deduplication_token.value, in_header); }); From c4207e9a6ef7c8ccd5e1c837535268e2f9f04b70 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:48:45 +0200 Subject: [PATCH 292/439] Update src/Interpreters/InterpreterInsertQuery.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index f9b57f530f0..333da81ced0 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -585,7 +585,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & return std::make_shared(settings.insert_deduplication_token.value, in_header); }); - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { return std::make_shared(in_header); }); From 913e97b1a5560536bfcdc722812a53395370a435 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 14:05:06 +0200 Subject: [PATCH 293/439] work with review comments --- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 6 ++++-- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 6 ++++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 333da81ced0..d7f778f6678 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -674,7 +674,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query { auto [presink_chains, sink_chains] = buildPreAndSinkChains( - 1, 1, + /* presink_streams */1, /* sink_streams */1, table, metadata_snapshot, query_sample_block); chain = std::move(presink_chains.front()); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 4a1163d2317..d8cfce1ca99 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -95,6 +95,8 @@ void MergeTreeSink::consume(Chunk & chunk) "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); + const bool need_to_define_dedup_token = !token_info->isDefined(); + String block_dedup_token; if (token_info->isDefined()) block_dedup_token = token_info->getToken(); @@ -123,7 +125,7 @@ void MergeTreeSink::consume(Chunk & chunk) if (!temp_part.part) continue; - if (!token_info->isDefined()) + if (need_to_define_dedup_token) { chassert(temp_part.part); const auto hash_value = temp_part.part->getPartBlockIDHash(); @@ -166,7 +168,7 @@ void MergeTreeSink::consume(Chunk & chunk) }); } - if (!token_info->isDefined()) + if (need_to_define_dedup_token) { token_info->finishChunkHashes(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index dedb4a9ddae..bbae054fbed 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -302,6 +302,8 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); + const bool need_to_define_dedup_token = !token_info->isDefined(); + if (token_info->isDefined()) block_dedup_token = token_info->getToken(); @@ -368,7 +370,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - if (!token_info->isDefined()) + if (need_to_define_dedup_token) { chassert(temp_part.part); const auto hash_value = temp_part.part->getPartBlockIDHash(); @@ -419,7 +421,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) )); } - if (!token_info->isDefined()) + if (need_to_define_dedup_token) { token_info->finishChunkHashes(); } From 6d3d33638ac45b8d7e6fd2d788335a40539548e8 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 3 Jul 2024 14:22:33 +0200 Subject: [PATCH 294/439] Fix lock-order-inversion in DatabaseCatalog --- src/Interpreters/DatabaseCatalog.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index aaec94a4fb0..841decf29c5 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -274,10 +274,12 @@ void DatabaseCatalog::shutdownImpl() database->shutdown(); } + TablesMarkedAsDropped tables_marked_dropped_to_destroy; { std::lock_guard lock(tables_marked_dropped_mutex); - tables_marked_dropped.clear(); + tables_marked_dropped.swap(tables_marked_dropped_to_destroy); } + tables_marked_dropped_to_destroy.clear(); std::lock_guard lock(databases_mutex); for (const auto & db : databases) From 2a1c13b070fbb3ad38ad8820f004b8687dab9425 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 3 Jul 2024 14:25:31 +0200 Subject: [PATCH 295/439] Add comment about the changes --- base/base/itoa.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index c17a2bfd999..60231507c96 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -119,6 +119,13 @@ inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) if (n < U(1e2)) { + /// This is changed from the original jeaiii implementation + /// For small numbers the extra branch to call outOneDigit() is worth it as it saves some instructions + /// and a memory access (no need to read digits.fd[n]) + /// This is not true for pure random numbers, but that's not the common use case of a database + /// Original jeaii code + // *reinterpret_cast(b) = digits.fd[n]; + // return n < 10 ? b + 1 : b + 2; return n < 10 ? outOneDigit(b, n) : outTwoDigits(b, n); } if (n < UInt32(1e6)) From 3a09000e4448c921cc9faefd387ef8b383a89c1a Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 14:33:21 +0200 Subject: [PATCH 296/439] remove trailing whitespaces --- src/Interpreters/InterpreterInsertQuery.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index d7f778f6678..2cbfc55d008 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -825,10 +825,10 @@ void registerInterpreterInsertQuery(InterpreterFactory & factory) auto create_fn = [] (const InterpreterFactory::Arguments & args) { return std::make_unique( - args.query, - args.context, - args.allow_materialized, - /* no_squash */false, + args.query, + args.context, + args.allow_materialized, + /* no_squash */false, /* no_destination */false, /* async_insert */false); }; From 4e6bdb15b0c58bd6d3457f21b2f9a493b698904e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 3 Jul 2024 14:35:17 +0200 Subject: [PATCH 297/439] Azure policy --- docker/test/stress/run.sh | 40 +++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 96f8ecb2fab..323944591b1 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -110,6 +110,15 @@ start_server clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "SHOW TABLES FROM test" +if [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + TEMP_POLICY="s3_cache" +elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + TEMP_POLICY="azure_cache" +else + TEMP_POLICY="default" +fi + + clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, @@ -135,7 +144,7 @@ clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnabl URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) - ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, @@ -161,7 +170,7 @@ clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable U URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) - ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, @@ -195,7 +204,7 @@ clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDat Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) - SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" @@ -216,13 +225,24 @@ export ZOOKEEPER_FAULT_INJECTION=1 export THREAD_POOL_FAULT_INJECTION=1 configure -# But we still need default disk because some tables loaded only into it -sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ - | sed "s|
s3
|
s3
default|" \ - > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp -mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +if [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + # But we still need default disk because some tables loaded only into it + sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ + | sed "s|
s3
|
s3
default|" \ + > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp + mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + # But we still need default disk because some tables loaded only into it + sudo cat /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml \ + | sed "s|
azure
|
azure
default|" \ + > /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml.tmp + mv /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml + sudo chown clickhouse /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml + sudo chgrp clickhouse /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml +fi + sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \ | sed "s|trace|test|" \ From 97e2c8c7d220aa073d3bbbe0f1a9624ab28a2076 Mon Sep 17 00:00:00 2001 From: Mikhail Gorshkov Date: Wed, 3 Jul 2024 12:56:20 +0000 Subject: [PATCH 298/439] PR review follow-up --- src/Functions/FunctionsRound.h | 88 ++-------------------------------- 1 file changed, 4 insertions(+), 84 deletions(-) diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index d43f7f264b4..357b8c03044 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -586,62 +586,8 @@ struct Dispatcher const auto * value_col_typed_const = checkAndGetColumnConst>(value_col); if (value_col_typed_const) { - const auto & value_data = value_col_typed_const->template getValue(); - // Const scale argument: - auto col_res = ColumnVector::create(); - typename ColumnVector::Container & vec_res = col_res->getData(); - if (scale_col == nullptr || isColumnConst(*scale_col)) - { - vec_res.resize(1); - auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); - if (scale_arg == 0) - { - size_t scale = 1; - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); - } - else if (scale_arg > 0) - { - size_t scale = intExp10(scale_arg); - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); - } - else - { - size_t scale = intExp10(-scale_arg); - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); - } - } - /// Non-const scale argument: - else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) - { - const auto & scale_data = scale_col_typed->getData(); - const size_t rows = scale_data.size(); - - vec_res.resize(rows); - - for (size_t i = 0; i < rows; ++i) - { - Int64 scale64 = scale_data[i]; - validateScale(scale64); - Scale raw_scale = scale64; - - if (raw_scale == 0) - { - size_t scale = 1; - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); - } - else if (raw_scale > 0) - { - size_t scale = intExp10(raw_scale); - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); - } - else - { - size_t scale = intExp10(-raw_scale); - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); - } - } - } - return col_res; + auto value_col_full = value_col_typed_const->convertToFullColumn(); + return apply(value_col_full.get(), scale_col); } return nullptr; } @@ -697,34 +643,8 @@ public: const auto * value_col_typed_const = checkAndGetColumnConst>(value_col); if (value_col_typed_const) { - auto col = assert_cast*>(value_col_typed_const->getDataColumnPtr().get()); - const auto & value_data = value_col_typed_const->template getValue(); - // Const scale argument: - if (scale_col == nullptr || isColumnConst(*scale_col)) - { - auto col_res = ColumnDecimal::create(1, col->getScale()); - auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); - DecimalRoundingImpl::applyOne(value_data, col->getScale(), reinterpret_cast::NativeT&>(col_res->getElement(0)), scale_arg); - return col_res; - } - /// Non-const scale argument: - if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) - { - const auto & scale = scale_col_typed->getData(); - const size_t rows = scale.size(); - auto col_res = ColumnDecimal::create(rows, col->getScale()); - - for (size_t i = 0; i < rows; ++i) - { - Int64 scale64 = scale[i]; - validateScale(scale64); - Scale raw_scale = scale64; - - DecimalRoundingImpl::applyOne(value_data, col->getScale(), - reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); - } - return col_res; - } + auto value_col_full = value_col_typed_const->convertToFullColumn(); + return apply(value_col_full.get(), scale_col); } return nullptr; } From 88601ae86914ea152fc0ae8e5fd74fe030598b18 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 3 Jul 2024 15:25:12 +0200 Subject: [PATCH 299/439] avoid conflicts in SettingsChangesHistory --- .gitattributes | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitattributes b/.gitattributes index 56d6fecf4b8..dd94a48f8e7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,3 +2,4 @@ contrib/* linguist-vendored *.h linguist-language=C++ tests/queries/0_stateless/data_json/* binary tests/queries/0_stateless/*.reference -crlf +src/Core/SettingsChangesHistory.cpp merge=union From 5fd36059e48c4377ea526343e7272009d2ccaf2a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 15:28:01 +0200 Subject: [PATCH 300/439] Try disabling background threads --- contrib/jemalloc-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 023fdcf103a..cc5a391676f 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -34,7 +34,7 @@ if (OS_LINUX) # avoid spurious latencies and additional work associated with # MADV_DONTNEED. See # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false") else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") endif() From 1c14a458e72bc9554e851c20ffff9bfa03e5446e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 16:11:02 +0200 Subject: [PATCH 301/439] Add profile events for regex cache --- src/Common/ProfileEvents.cpp | 4 ++++ src/Functions/Regexps.h | 22 +++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index d98373b6c55..cd5f67fdff2 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -239,6 +239,10 @@ M(CannotRemoveEphemeralNode, "Number of times an error happened while trying to remove ephemeral node. This is not an issue, because our implementation of ZooKeeper library guarantee that the session will expire and the node will be removed.") \ \ M(RegexpCreated, "Compiled regular expressions. Identical regular expressions compiled just once and cached forever.") \ + M(RegexpGlobalCacheHit, "Number of times we fetched compiled regular expression from the global cache.") \ + M(RegexpGlobalCacheMiss, "Number of times we failed to fetch compiled regular expression from the global cache.") \ + M(RegexpLocalCacheHit, "Number of times we fetched compiled regular expression from the local cache.") \ + M(RegexpLocalCacheMiss, "Number of times we failed to fetch compiled regular expression from the local cache.") \ M(ContextLock, "Number of times the lock of Context was acquired or tried to acquire. This is global lock.") \ M(ContextLockWaitMicroseconds, "Context lock wait time in microseconds") \ \ diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index b6bd463212f..fff21fdb941 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -24,6 +24,10 @@ namespace ProfileEvents { extern const Event RegexpCreated; +extern const Event RegexpGlobalCacheHit; +extern const Event RegexpGlobalCacheMiss; +extern const Event RegexpLocalCacheHit; +extern const Event RegexpLocalCacheMiss; } @@ -72,18 +76,28 @@ public: Bucket & bucket = known_regexps[hasher(pattern) % CACHE_SIZE]; if (bucket.regexp == nullptr) [[unlikely]] + { /// insert new entry + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss); bucket = {pattern, std::make_shared(createRegexp(pattern))}; + } else + { if (pattern != bucket.pattern) + { + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss); /// replace existing entry bucket = {pattern, std::make_shared(createRegexp(pattern))}; + } + else + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheHit); + } return bucket.regexp; } private: - constexpr static size_t CACHE_SIZE = 100; /// collision probability + constexpr static size_t CACHE_SIZE = 1000; /// collision probability std::hash hasher; struct Bucket @@ -322,9 +336,11 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector(str_patterns, edit_distance); }); + ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheMiss); bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps}; } else + { if (bucket.patterns != str_patterns || bucket.edit_distance != edit_distance) { /// replace existing entry @@ -333,8 +349,12 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector(str_patterns, edit_distance); }); + ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheMiss); bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps}; } + else + ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheHit); + } return bucket.regexps; } From c96e3c6d1a9edb1e0d22cf818994f7647fa4a9d2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 3 Jul 2024 16:21:36 +0200 Subject: [PATCH 302/439] Fix which I don't understand --- .../MergeTree/MergeTreeDataPartWriterWide.cpp | 11 +++++++++-- ...3198_non_adaptive_granularity_no_errors.reference | 2 ++ .../03198_non_adaptive_granularity_no_errors.sql | 12 ++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference create mode 100644 tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 9666f310d3d..a69d21de8e7 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -560,7 +560,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai { /// With fixed granularity we can have last mark with less rows than granularity const bool is_last_mark = (mark_num + 1 == index_granularity.getMarksCount()); - if (!data_part->index_granularity_info.fixed_index_granularity || !is_last_mark) + if (!index_granularity_info.fixed_index_granularity || !is_last_mark) throw Exception( ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{}" @@ -785,7 +785,7 @@ void MergeTreeDataPartWriterWide::adjustLastMarkIfNeedAndFlushToDisk(size_t new_ /// We can adjust marks only if we computed granularity for blocks. /// Otherwise we cannot change granularity because it will differ from /// other columns -// if (compute_granularity && settings.can_use_adaptive_granularity) + if (compute_granularity && settings.can_use_adaptive_granularity) { if (getCurrentMark() != index_granularity.getMarksCount() - 1) throw Exception(ErrorCodes::LOGICAL_ERROR, @@ -824,7 +824,14 @@ void MergeTreeDataPartWriterWide::adjustLastMarkIfNeedAndFlushToDisk(size_t new_ /// Without offset rows_written_in_last_mark = 0; } + + if (compute_granularity) + { + index_granularity.popMark(); + index_granularity.appendMark(new_rows_in_last_mark); + } } + } } diff --git a/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference new file mode 100644 index 00000000000..fcd78da1283 --- /dev/null +++ b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference @@ -0,0 +1,2 @@ +1000000 +1000000 diff --git a/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql new file mode 100644 index 00000000000..25798ef6d33 --- /dev/null +++ b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS data_02051__fuzz_24; + +CREATE TABLE data_02051__fuzz_24 (`key` Int16, `value` String) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity_bytes = 0, min_rows_for_wide_part = 0, min_bytes_for_wide_part=0 AS SELECT number, repeat(toString(number), 5) FROM numbers(1000000.); + +SELECT count(ignore(*)) FROM data_02051__fuzz_24 PREWHERE materialize(1) GROUP BY ignore(*); + +detach table data_02051__fuzz_24; +attach table data_02051__fuzz_24; + +SELECT count(ignore(*)) FROM data_02051__fuzz_24 PREWHERE materialize(1) GROUP BY ignore(*); + +DROP TABLE data_02051__fuzz_24; From 2e5acb2bdd929928cc9e47ff7038a1775f0ab463 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Jul 2024 16:33:58 +0200 Subject: [PATCH 303/439] Tests: replace openssl base64 with base64 --- .../0_stateless/03167_base64_url_functions_sh.sh | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/03167_base64_url_functions_sh.sh b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh index 12eea7462df..ec3170b165c 100755 --- a/tests/queries/0_stateless/03167_base64_url_functions_sh.sh +++ b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh @@ -119,7 +119,7 @@ urls=( base64URLEncode() { - echo -n "$1" | openssl base64 -e -A | tr '+/' '-_' | tr -d '=' + echo -n "$1" | base64 -w0 | tr '+/' '-_' | tr -d '=' } base64URLDecode() { @@ -128,7 +128,7 @@ base64URLDecode() { if [ $len -eq 2 ]; then result="$1"'==' elif [ $len -eq 3 ]; then result="$1"'=' fi - echo "$result" | tr '_-' '/+' | openssl base64 -d -A + echo "$result" | tr '_-' '/+' | base64 -w0 -d } test_compare_to_gold_encode() { @@ -158,12 +158,10 @@ test_compare_to_gold_decode() { test_compare_to_self() { local input="$1" - local encode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLEncode('$input')") - local decode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode('$encode')") + local decode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode(base64URLEncode('$input'))") if [ "$decode" != "$input" ]; then echo "Input: $input" - echo "Encode: $encode" echo "Got: $decode" fi } @@ -181,10 +179,8 @@ for url in "${urls[@]}"; do done # special case for ' -encode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLEncode('http://example.com/!$&\'()*+,;=:@/path')") -decode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode('$encode')") +decode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode(base64URLEncode('http://example.com/!$&\'()*+,;=:@/path'))") if [ "$decode" != "http://example.com/!$&\'()*+,;=:@/path" ]; then echo "Special case fail" - echo "Encode: $encode" echo "Got: $decode" fi From 8319d2579789aee45a8e02cac0131e7dc348eedd Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 14:43:47 +0000 Subject: [PATCH 304/439] Minor updates --- src/Common/ProfileEvents.cpp | 11 ++++++----- src/Functions/Regexps.h | 22 +++++++++++----------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index cd5f67fdff2..2e3984f8f10 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -238,11 +238,12 @@ \ M(CannotRemoveEphemeralNode, "Number of times an error happened while trying to remove ephemeral node. This is not an issue, because our implementation of ZooKeeper library guarantee that the session will expire and the node will be removed.") \ \ - M(RegexpCreated, "Compiled regular expressions. Identical regular expressions compiled just once and cached forever.") \ - M(RegexpGlobalCacheHit, "Number of times we fetched compiled regular expression from the global cache.") \ - M(RegexpGlobalCacheMiss, "Number of times we failed to fetch compiled regular expression from the global cache.") \ - M(RegexpLocalCacheHit, "Number of times we fetched compiled regular expression from the local cache.") \ - M(RegexpLocalCacheMiss, "Number of times we failed to fetch compiled regular expression from the local cache.") \ + M(RegexpWithMultipleNeedlesCreated, "Regular expressions with multiple needles (VectorScan library) compiled.") \ + M(RegexpWithMultipleNeedlesCacheHit, "Number of times we fetched compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ + M(RegexpWithMultipleNeedlesCacheMiss, "Number of times we failed to fetch compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ + M(RegexpLocalCacheHit, "Number of times we fetched compiled regular expression from a local cache.") \ + M(RegexpLocalCacheMiss, "Number of times we failed to fetch compiled regular expression from a local cache.") \ + \ M(ContextLock, "Number of times the lock of Context was acquired or tried to acquire. This is global lock.") \ M(ContextLockWaitMicroseconds, "Context lock wait time in microseconds") \ \ diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index fff21fdb941..b317d786fab 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -23,11 +23,11 @@ namespace ProfileEvents { -extern const Event RegexpCreated; -extern const Event RegexpGlobalCacheHit; -extern const Event RegexpGlobalCacheMiss; -extern const Event RegexpLocalCacheHit; -extern const Event RegexpLocalCacheMiss; + extern const Event RegexpWithMultipleNeedlesCreated; + extern const Event RegexpWithMultipleNeedlesGlobalCacheHit; + extern const Event RegexpWithMultipleNeedlesGlobalCacheMiss; + extern const Event RegexpLocalCacheHit; + extern const Event RegexpLocalCacheMiss; } @@ -85,8 +85,8 @@ public: { if (pattern != bucket.pattern) { - ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss); /// replace existing entry + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss); bucket = {pattern, std::make_shared(createRegexp(pattern))}; } else @@ -97,7 +97,7 @@ public: } private: - constexpr static size_t CACHE_SIZE = 1000; /// collision probability + constexpr static size_t CACHE_SIZE = 1'000; /// collision probability std::hash hasher; struct Bucket @@ -258,7 +258,7 @@ inline Regexps constructRegexps(const std::vector & str_patterns, [[mayb throw Exception(ErrorCodes::BAD_ARGUMENTS, "Pattern '{}' failed with error '{}'", str_patterns[error->expression], String(error->message)); } - ProfileEvents::increment(ProfileEvents::RegexpCreated); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesCreated); /// We allocate the scratch space only once, then copy it across multiple threads with hs_clone_scratch /// function which is faster than allocating scratch space each time in each thread. @@ -336,7 +336,7 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector(str_patterns, edit_distance); }); - ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheMiss); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheMiss); bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps}; } else @@ -349,11 +349,11 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector(str_patterns, edit_distance); }); - ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheMiss); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheMiss); bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps}; } else - ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheHit); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheHit); } return bucket.regexps; From 07f51e02eda1c8194da28317e4d8452a5c52fc40 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 16:54:09 +0200 Subject: [PATCH 305/439] Reuse some checks --- programs/CMakeLists.txt | 4 +- programs/keeper/keeper_main.cpp | 266 +----------------------------- programs/main.cpp | 268 +------------------------------ src/Common/Coverage.cpp | 45 ++++++ src/Common/Coverage.h | 5 + src/Common/EnvironmentChecks.cpp | 234 +++++++++++++++++++++++++++ src/Common/EnvironmentChecks.h | 5 + 7 files changed, 297 insertions(+), 530 deletions(-) create mode 100644 src/Common/Coverage.cpp create mode 100644 src/Common/Coverage.h create mode 100644 src/Common/EnvironmentChecks.cpp create mode 100644 src/Common/EnvironmentChecks.h diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index b06290ae352..6b3a0b16624 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -73,9 +73,9 @@ else() endif() if (ENABLE_CLICKHOUSE_KEEPER) - message(STATUS "ClickHouse keeper mode: ON") + message(STATUS "ClickHouse Keeper: ON") else() - message(STATUS "ClickHouse keeper mode: OFF") + message(STATUS "ClickHouse Keeper: OFF") endif() if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) diff --git a/programs/keeper/keeper_main.cpp b/programs/keeper/keeper_main.cpp index ec9b84ce94b..a240f9699f2 100644 --- a/programs/keeper/keeper_main.cpp +++ b/programs/keeper/keeper_main.cpp @@ -1,11 +1,9 @@ -#include #include #include #include #include #include -#include #include #include /// pair @@ -14,6 +12,9 @@ #include "config.h" #include "config_tools.h" +#include +#include + #include #include #include @@ -59,270 +60,9 @@ int printHelp(int, char **) return -1; } - -enum class InstructionFail : uint8_t -{ - NONE = 0, - SSE3 = 1, - SSSE3 = 2, - SSE4_1 = 3, - SSE4_2 = 4, - POPCNT = 5, - AVX = 6, - AVX2 = 7, - AVX512 = 8 -}; - -auto instructionFailToString(InstructionFail fail) -{ - switch (fail) - { -#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) - case InstructionFail::NONE: - ret("NONE"); - case InstructionFail::SSE3: - ret("SSE3"); - case InstructionFail::SSSE3: - ret("SSSE3"); - case InstructionFail::SSE4_1: - ret("SSE4.1"); - case InstructionFail::SSE4_2: - ret("SSE4.2"); - case InstructionFail::POPCNT: - ret("POPCNT"); - case InstructionFail::AVX: - ret("AVX"); - case InstructionFail::AVX2: - ret("AVX2"); - case InstructionFail::AVX512: - ret("AVX512"); -#undef ret - } } -sigjmp_buf jmpbuf; - -[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *) -{ - siglongjmp(jmpbuf, 1); -} - -/// Check if necessary SSE extensions are available by trying to execute some sse instructions. -/// If instruction is unavailable, SIGILL will be sent by kernel. -void checkRequiredInstructionsImpl(volatile InstructionFail & fail) -{ -#if defined(__SSE3__) - fail = InstructionFail::SSE3; - __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); -#endif - -#if defined(__SSSE3__) - fail = InstructionFail::SSSE3; - __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); - -#endif - -#if defined(__SSE4_1__) - fail = InstructionFail::SSE4_1; - __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); -#endif - -#if defined(__SSE4_2__) - fail = InstructionFail::SSE4_2; - __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); -#endif - - /// Defined by -msse4.2 -#if defined(__POPCNT__) - fail = InstructionFail::POPCNT; - { - uint64_t a = 0; - uint64_t b = 0; - __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); - } -#endif - -#if defined(__AVX__) - fail = InstructionFail::AVX; - __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); -#endif - -#if defined(__AVX2__) - fail = InstructionFail::AVX2; - __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); -#endif - -#if defined(__AVX512__) - fail = InstructionFail::AVX512; - __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); -#endif - - fail = InstructionFail::NONE; -} - -/// Macros to avoid using strlen(), since it may fail if SSE is not supported. -#define writeError(data) do \ - { \ - static_assert(__builtin_constant_p(data)); \ - if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ - _Exit(1); \ - } while (false) - -/// Check SSE and others instructions availability. Calls exit on fail. -/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. -void checkRequiredInstructions() -{ - struct sigaction sa{}; - struct sigaction sa_old{}; - sa.sa_sigaction = sigIllCheckHandler; - sa.sa_flags = SA_SIGINFO; - auto signal = SIGILL; - if (sigemptyset(&sa.sa_mask) != 0 - || sigaddset(&sa.sa_mask, signal) != 0 - || sigaction(signal, &sa, &sa_old) != 0) - { - /// You may wonder about strlen. - /// Typical implementation of strlen is using SSE4.2 or AVX2. - /// But this is not the case because it's compiler builtin and is executed at compile time. - - writeError("Can not set signal handler\n"); - _Exit(1); - } - - volatile InstructionFail fail = InstructionFail::NONE; - - if (sigsetjmp(jmpbuf, 1)) - { - writeError("Instruction check fail. The CPU does not support "); - if (!std::apply(writeRetry, instructionFailToString(fail))) - _Exit(1); - writeError(" instruction set.\n"); - _Exit(1); - } - - checkRequiredInstructionsImpl(fail); - - if (sigaction(signal, &sa_old, nullptr)) - { - writeError("Can not set signal handler\n"); - _Exit(1); - } -} - -struct Checker -{ - Checker() - { - checkRequiredInstructions(); - } -} checker -#ifndef OS_DARWIN - __attribute__((init_priority(101))) /// Run before other static initializers. -#endif -; - - -#if !defined(USE_MUSL) -/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. -void checkHarmfulEnvironmentVariables(char ** argv) -{ - std::initializer_list harmful_env_variables = { - /// The list is a selection from "man ld-linux". - "LD_PRELOAD", - "LD_LIBRARY_PATH", - "LD_ORIGIN_PATH", - "LD_AUDIT", - "LD_DYNAMIC_WEAK", - /// The list is a selection from "man dyld" (osx). - "DYLD_LIBRARY_PATH", - "DYLD_FALLBACK_LIBRARY_PATH", - "DYLD_VERSIONED_LIBRARY_PATH", - "DYLD_INSERT_LIBRARIES", - }; - - bool require_reexec = false; - for (const auto * var : harmful_env_variables) - { - if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe) - { - /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful - if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently - { - fmt::print(stderr, "Cannot override {} environment variable", var); - _exit(1); - } - require_reexec = true; - } - } - - if (require_reexec) - { - /// Use execvp() over execv() to search in PATH. - /// - /// This should be safe, since: - /// - if argv[0] is relative path - it is OK - /// - if argv[0] has only basename, the it will search in PATH, like shell will do. - /// - /// Also note, that this (search in PATH) because there is no easy and - /// portable way to get absolute path of argv[0]. - /// - on linux there is /proc/self/exec and AT_EXECFN - /// - but on other OSes there is no such thing (especially on OSX). - /// - /// And since static linking will be done someday anyway, - /// let's not pollute the code base with special cases. - int error = execvp(argv[0], argv); - _exit(error); - } -} -#endif - - -#if defined(SANITIZE_COVERAGE) -__attribute__((no_sanitize("coverage"))) -void dumpCoverage() -{ - /// A user can request to dump the coverage information into files at exit. - /// This is useful for non-server applications such as clickhouse-format or clickhouse-client, - /// that cannot introspect it with SQL functions at runtime. - - /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid' - /// containing the list of addresses of covered . - - /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header. - - if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe) - { - auto dump = [](const std::string & name, auto span) - { - /// Write only non-zeros. - std::vector data; - data.reserve(span.size()); - for (auto addr : span) - if (addr) - data.push_back(addr); - - int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400); - if (-1 == fd) - { - writeError("Cannot open a file to write the coverage data\n"); - } - else - { - if (!writeRetry(fd, reinterpret_cast(data.data()), data.size() * sizeof(data[0]))) - writeError("Cannot write the coverage data to a file\n"); - if (0 != ::close(fd)) - writeError("Cannot close the file with coverage data\n"); - } - }; - - dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage()); - } -} -#endif - -} - bool isClickhouseApp(std::string_view app_suffix, std::vector & argv) { /// Use app if the first arg 'app' is passed (the arg should be quietly removed) diff --git a/programs/main.cpp b/programs/main.cpp index 61e2bc18ed7..eecbe3a6876 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -1,5 +1,3 @@ -#include -#include #include #include @@ -7,7 +5,6 @@ #include #include #include -#include #include #include /// pair @@ -16,6 +13,9 @@ #include "config.h" #include "config_tools.h" + +#include +#include #include #include #include @@ -119,268 +119,6 @@ std::pair clickhouse_short_names[] = {"chc", "client"}, }; - -enum class InstructionFail : uint8_t -{ - NONE = 0, - SSE3 = 1, - SSSE3 = 2, - SSE4_1 = 3, - SSE4_2 = 4, - POPCNT = 5, - AVX = 6, - AVX2 = 7, - AVX512 = 8 -}; - -auto instructionFailToString(InstructionFail fail) -{ - switch (fail) - { -#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) - case InstructionFail::NONE: - ret("NONE"); - case InstructionFail::SSE3: - ret("SSE3"); - case InstructionFail::SSSE3: - ret("SSSE3"); - case InstructionFail::SSE4_1: - ret("SSE4.1"); - case InstructionFail::SSE4_2: - ret("SSE4.2"); - case InstructionFail::POPCNT: - ret("POPCNT"); - case InstructionFail::AVX: - ret("AVX"); - case InstructionFail::AVX2: - ret("AVX2"); - case InstructionFail::AVX512: - ret("AVX512"); -#undef ret - } -} - - -sigjmp_buf jmpbuf; - -[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *) -{ - siglongjmp(jmpbuf, 1); -} - -/// Check if necessary SSE extensions are available by trying to execute some sse instructions. -/// If instruction is unavailable, SIGILL will be sent by kernel. -void checkRequiredInstructionsImpl(volatile InstructionFail & fail) -{ -#if defined(__SSE3__) - fail = InstructionFail::SSE3; - __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); -#endif - -#if defined(__SSSE3__) - fail = InstructionFail::SSSE3; - __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); - -#endif - -#if defined(__SSE4_1__) - fail = InstructionFail::SSE4_1; - __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); -#endif - -#if defined(__SSE4_2__) - fail = InstructionFail::SSE4_2; - __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); -#endif - - /// Defined by -msse4.2 -#if defined(__POPCNT__) - fail = InstructionFail::POPCNT; - { - uint64_t a = 0; - uint64_t b = 0; - __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); - } -#endif - -#if defined(__AVX__) - fail = InstructionFail::AVX; - __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); -#endif - -#if defined(__AVX2__) - fail = InstructionFail::AVX2; - __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); -#endif - -#if defined(__AVX512__) - fail = InstructionFail::AVX512; - __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); -#endif - - fail = InstructionFail::NONE; -} - -/// Macros to avoid using strlen(), since it may fail if SSE is not supported. -#define writeError(data) do \ - { \ - static_assert(__builtin_constant_p(data)); \ - if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ - _Exit(1); \ - } while (false) - -/// Check SSE and others instructions availability. Calls exit on fail. -/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. -void checkRequiredInstructions() -{ - struct sigaction sa{}; - struct sigaction sa_old{}; - sa.sa_sigaction = sigIllCheckHandler; - sa.sa_flags = SA_SIGINFO; - auto signal = SIGILL; - if (sigemptyset(&sa.sa_mask) != 0 - || sigaddset(&sa.sa_mask, signal) != 0 - || sigaction(signal, &sa, &sa_old) != 0) - { - /// You may wonder about strlen. - /// Typical implementation of strlen is using SSE4.2 or AVX2. - /// But this is not the case because it's compiler builtin and is executed at compile time. - - writeError("Can not set signal handler\n"); - _Exit(1); - } - - volatile InstructionFail fail = InstructionFail::NONE; - - if (sigsetjmp(jmpbuf, 1)) - { - writeError("Instruction check fail. The CPU does not support "); - if (!std::apply(writeRetry, instructionFailToString(fail))) - _Exit(1); - writeError(" instruction set.\n"); - _Exit(1); - } - - checkRequiredInstructionsImpl(fail); - - if (sigaction(signal, &sa_old, nullptr)) - { - writeError("Can not set signal handler\n"); - _Exit(1); - } -} - -struct Checker -{ - Checker() - { - checkRequiredInstructions(); - } -} checker -#ifndef OS_DARWIN - __attribute__((init_priority(101))) /// Run before other static initializers. -#endif -; - - -#if !defined(USE_MUSL) -/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. -void checkHarmfulEnvironmentVariables(char ** argv) -{ - std::initializer_list harmful_env_variables = { - /// The list is a selection from "man ld-linux". - "LD_PRELOAD", - "LD_LIBRARY_PATH", - "LD_ORIGIN_PATH", - "LD_AUDIT", - "LD_DYNAMIC_WEAK", - /// The list is a selection from "man dyld" (osx). - "DYLD_LIBRARY_PATH", - "DYLD_FALLBACK_LIBRARY_PATH", - "DYLD_VERSIONED_LIBRARY_PATH", - "DYLD_INSERT_LIBRARIES", - }; - - bool require_reexec = false; - for (const auto * var : harmful_env_variables) - { - if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe) - { - /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful - if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently - { - fmt::print(stderr, "Cannot override {} environment variable", var); - _exit(1); - } - require_reexec = true; - } - } - - if (require_reexec) - { - /// Use execvp() over execv() to search in PATH. - /// - /// This should be safe, since: - /// - if argv[0] is relative path - it is OK - /// - if argv[0] has only basename, the it will search in PATH, like shell will do. - /// - /// Also note, that this (search in PATH) because there is no easy and - /// portable way to get absolute path of argv[0]. - /// - on linux there is /proc/self/exec and AT_EXECFN - /// - but on other OSes there is no such thing (especially on OSX). - /// - /// And since static linking will be done someday anyway, - /// let's not pollute the code base with special cases. - int error = execvp(argv[0], argv); - _exit(error); - } -} -#endif - - -#if defined(SANITIZE_COVERAGE) -__attribute__((no_sanitize("coverage"))) -void dumpCoverage() -{ - /// A user can request to dump the coverage information into files at exit. - /// This is useful for non-server applications such as clickhouse-format or clickhouse-client, - /// that cannot introspect it with SQL functions at runtime. - - /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid' - /// containing the list of addresses of covered . - - /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header. - - if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe) - { - auto dump = [](const std::string & name, auto span) - { - /// Write only non-zeros. - std::vector data; - data.reserve(span.size()); - for (auto addr : span) - if (addr) - data.push_back(addr); - - int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400); - if (-1 == fd) - { - writeError("Cannot open a file to write the coverage data\n"); - } - else - { - if (!writeRetry(fd, reinterpret_cast(data.data()), data.size() * sizeof(data[0]))) - writeError("Cannot write the coverage data to a file\n"); - if (0 != ::close(fd)) - writeError("Cannot close the file with coverage data\n"); - } - }; - - dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage()); - } -} -#endif - } bool isClickhouseApp(std::string_view app_suffix, std::vector & argv) diff --git a/src/Common/Coverage.cpp b/src/Common/Coverage.cpp new file mode 100644 index 00000000000..fa8da1f9e15 --- /dev/null +++ b/src/Common/Coverage.cpp @@ -0,0 +1,45 @@ +#include + +#if defined(SANITIZE_COVERAGE) +__attribute__((no_sanitize("coverage"))) +void dumpCoverage() +{ + /// A user can request to dump the coverage information into files at exit. + /// This is useful for non-server applications such as clickhouse-format or clickhouse-client, + /// that cannot introspect it with SQL functions at runtime. + + /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid' + /// containing the list of addresses of covered . + + /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header. + + if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe) + { + auto dump = [](const std::string & name, auto span) + { + /// Write only non-zeros. + std::vector data; + data.reserve(span.size()); + for (auto addr : span) + if (addr) + data.push_back(addr); + + int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400); + if (-1 == fd) + { + writeError("Cannot open a file to write the coverage data\n"); + } + else + { + if (!writeRetry(fd, reinterpret_cast(data.data()), data.size() * sizeof(data[0]))) + writeError("Cannot write the coverage data to a file\n"); + if (0 != ::close(fd)) + writeError("Cannot close the file with coverage data\n"); + } + }; + + dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage()); + } +} +#endif + diff --git a/src/Common/Coverage.h b/src/Common/Coverage.h new file mode 100644 index 00000000000..aa6dd2825ed --- /dev/null +++ b/src/Common/Coverage.h @@ -0,0 +1,5 @@ +#pragma once + +#if defined(SANITIZE_COVERAGE) +void dumpCoverage(); +#endif diff --git a/src/Common/EnvironmentChecks.cpp b/src/Common/EnvironmentChecks.cpp new file mode 100644 index 00000000000..d69e8cbaa3d --- /dev/null +++ b/src/Common/EnvironmentChecks.cpp @@ -0,0 +1,234 @@ +#include +#include + +#include + +#include +#include +#include + +#include + +#include + +namespace +{ + +enum class InstructionFail : uint8_t +{ + NONE = 0, + SSE3 = 1, + SSSE3 = 2, + SSE4_1 = 3, + SSE4_2 = 4, + POPCNT = 5, + AVX = 6, + AVX2 = 7, + AVX512 = 8 +}; + +auto instructionFailToString(InstructionFail fail) +{ + switch (fail) + { +#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) + case InstructionFail::NONE: + ret("NONE"); + case InstructionFail::SSE3: + ret("SSE3"); + case InstructionFail::SSSE3: + ret("SSSE3"); + case InstructionFail::SSE4_1: + ret("SSE4.1"); + case InstructionFail::SSE4_2: + ret("SSE4.2"); + case InstructionFail::POPCNT: + ret("POPCNT"); + case InstructionFail::AVX: + ret("AVX"); + case InstructionFail::AVX2: + ret("AVX2"); + case InstructionFail::AVX512: + ret("AVX512"); +#undef ret + } +} + + +sigjmp_buf jmpbuf; + +[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *) +{ + siglongjmp(jmpbuf, 1); +} + +/// Check if necessary SSE extensions are available by trying to execute some sse instructions. +/// If instruction is unavailable, SIGILL will be sent by kernel. +void checkRequiredInstructionsImpl(volatile InstructionFail & fail) +{ +#if defined(__SSE3__) + fail = InstructionFail::SSE3; + __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if defined(__SSSE3__) + fail = InstructionFail::SSSE3; + __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); + +#endif + +#if defined(__SSE4_1__) + fail = InstructionFail::SSE4_1; + __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if defined(__SSE4_2__) + fail = InstructionFail::SSE4_2; + __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); +#endif + + /// Defined by -msse4.2 +#if defined(__POPCNT__) + fail = InstructionFail::POPCNT; + { + uint64_t a = 0; + uint64_t b = 0; + __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); + } +#endif + +#if defined(__AVX__) + fail = InstructionFail::AVX; + __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if defined(__AVX2__) + fail = InstructionFail::AVX2; + __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if defined(__AVX512__) + fail = InstructionFail::AVX512; + __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); +#endif + + fail = InstructionFail::NONE; +} + +/// Macros to avoid using strlen(), since it may fail if SSE is not supported. +#define writeError(data) do \ + { \ + static_assert(__builtin_constant_p(data)); \ + if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ + _Exit(1); \ + } while (false) + +/// Check SSE and others instructions availability. Calls exit on fail. +/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. +void checkRequiredInstructions() +{ + struct sigaction sa{}; + struct sigaction sa_old{}; + sa.sa_sigaction = sigIllCheckHandler; + sa.sa_flags = SA_SIGINFO; + auto signal = SIGILL; + if (sigemptyset(&sa.sa_mask) != 0 + || sigaddset(&sa.sa_mask, signal) != 0 + || sigaction(signal, &sa, &sa_old) != 0) + { + /// You may wonder about strlen. + /// Typical implementation of strlen is using SSE4.2 or AVX2. + /// But this is not the case because it's compiler builtin and is executed at compile time. + + writeError("Can not set signal handler\n"); + _Exit(1); + } + + volatile InstructionFail fail = InstructionFail::NONE; + + if (sigsetjmp(jmpbuf, 1)) + { + writeError("Instruction check fail. The CPU does not support "); + if (!std::apply(writeRetry, instructionFailToString(fail))) + _Exit(1); + writeError(" instruction set.\n"); + _Exit(1); + } + + checkRequiredInstructionsImpl(fail); + + if (sigaction(signal, &sa_old, nullptr)) + { + writeError("Can not set signal handler\n"); + _Exit(1); + } +} + +struct Checker +{ + Checker() + { + checkRequiredInstructions(); + } +} checker +#ifndef OS_DARWIN + __attribute__((init_priority(101))) /// Run before other static initializers. +#endif +; + +} + + +#if !defined(USE_MUSL) +/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. +void checkHarmfulEnvironmentVariables(char ** argv) +{ + std::initializer_list harmful_env_variables = { + /// The list is a selection from "man ld-linux". + "LD_PRELOAD", + "LD_LIBRARY_PATH", + "LD_ORIGIN_PATH", + "LD_AUDIT", + "LD_DYNAMIC_WEAK", + /// The list is a selection from "man dyld" (osx). + "DYLD_LIBRARY_PATH", + "DYLD_FALLBACK_LIBRARY_PATH", + "DYLD_VERSIONED_LIBRARY_PATH", + "DYLD_INSERT_LIBRARIES", + }; + + bool require_reexec = false; + for (const auto * var : harmful_env_variables) + { + if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe) + { + /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful + if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently + { + fmt::print(stderr, "Cannot override {} environment variable", var); + _exit(1); + } + require_reexec = true; + } + } + + if (require_reexec) + { + /// Use execvp() over execv() to search in PATH. + /// + /// This should be safe, since: + /// - if argv[0] is relative path - it is OK + /// - if argv[0] has only basename, the it will search in PATH, like shell will do. + /// + /// Also note, that this (search in PATH) because there is no easy and + /// portable way to get absolute path of argv[0]. + /// - on linux there is /proc/self/exec and AT_EXECFN + /// - but on other OSes there is no such thing (especially on OSX). + /// + /// And since static linking will be done someday anyway, + /// let's not pollute the code base with special cases. + int error = execvp(argv[0], argv); + _exit(error); + } +} +#endif diff --git a/src/Common/EnvironmentChecks.h b/src/Common/EnvironmentChecks.h new file mode 100644 index 00000000000..6d355a69ff9 --- /dev/null +++ b/src/Common/EnvironmentChecks.h @@ -0,0 +1,5 @@ +#pragma once + +#if !defined(USE_MUSL) +void checkHarmfulEnvironmentVariables(char ** argv); +#endif From 39a371b27de3f9f1ee94f6da0ce1b78ab527d6f0 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 15:07:31 +0000 Subject: [PATCH 306/439] Bump vectorscan --- contrib/vectorscan | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/vectorscan b/contrib/vectorscan index 38431d11178..4918f81ea3d 160000 --- a/contrib/vectorscan +++ b/contrib/vectorscan @@ -1 +1 @@ -Subproject commit 38431d111781843741a781a57a6381a527d900a4 +Subproject commit 4918f81ea3d1abd18905bac9876d4a1fe2ebdf07 From 9a023744a5825d349c1027e1c1d425956a3bc87f Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 3 Jul 2024 11:13:39 -0400 Subject: [PATCH 307/439] fix build --- src/Storages/StorageFuzzQuery.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp index 229ae1af7c1..6e8f425f8dc 100644 --- a/src/Storages/StorageFuzzQuery.cpp +++ b/src/Storages/StorageFuzzQuery.cpp @@ -1,14 +1,12 @@ #include #include -#include #include #include #include #include #include #include -#include #include #include @@ -41,10 +39,8 @@ ColumnPtr FuzzQuerySource::createColumn() fuzzer.fuzzMain(new_query); auto fuzzed_text = new_query->formatForErrorMessage(); - WriteBufferFromOwnString out; - formatAST(*new_query, out, false); - auto data = out.str(); - size_t data_len = data.size(); + if (base_before_fuzz == fuzzed_text) + continue; /// AST is too long, will start from the original query. if (config.max_query_length > 500) @@ -53,12 +49,12 @@ ColumnPtr FuzzQuerySource::createColumn() continue; } - IColumn::Offset next_offset = offset + data_len + 1; + IColumn::Offset next_offset = offset + fuzzed_text.size() + 1; data_to.resize(next_offset); - std::copy(data.begin(), data.end(), &data_to[offset]); + std::copy(fuzzed_text.begin(), fuzzed_text.end(), &data_to[offset]); - data_to[offset + data_len] = 0; + data_to[offset + fuzzed_text.size()] = 0; offsets_to[row_num] = next_offset; offset = next_offset; From ee3c530817d7cf76e27ed61c1b2be532acf3b32c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 15:40:41 +0000 Subject: [PATCH 308/439] Remove obsolete comment --- src/Functions/generateUUIDv7.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/Functions/generateUUIDv7.cpp b/src/Functions/generateUUIDv7.cpp index b226c0840f4..b1807a3fd35 100644 --- a/src/Functions/generateUUIDv7.cpp +++ b/src/Functions/generateUUIDv7.cpp @@ -11,20 +11,6 @@ namespace /* Bit layouts of UUIDv7 -without counter: - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| unix_ts_ms | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| unix_ts_ms | ver | rand_a | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -|var| rand_b | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| rand_b | -└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ - -with counter: 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ From af845234a2666dbfe16e6877a6c6d111e26c2f44 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Jul 2024 17:53:05 +0200 Subject: [PATCH 309/439] Review fixes --- docker/test/stateless/run.sh | 4 +++- tests/clickhouse-test | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 2cbc5304212..96436beaf0c 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -265,7 +265,9 @@ function run_tests() export -f run_tests -TIMEOUT=$((${MAX_RUN_TIME} - 200)) + +# This should be enough to setup job and collect artifacts +TIMEOUT=$((MAX_RUN_TIME - 300)) if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 8e2a256fae2..c2acdb715ea 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -358,6 +358,14 @@ def clickhouse_execute_json( return rows +def stop_tests(): + # send signal to all processes in group to avoid hung check triggering + # (to avoid terminating clickhouse-test itself, the signal should be ignored) + signal.signal(signal.SIGTERM, signal.SIG_IGN) + os.killpg(os.getpgid(os.getpid()), signal.SIGTERM) + signal.signal(signal.SIGTERM, signal.SIG_DFL) + + def get_db_engine(args, database_name): if args.replicated_database: return f" ON CLUSTER test_cluster_database_replicated \ @@ -2098,10 +2106,12 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): break if server_died.is_set(): + stop_tests() raise ServerDied("Server died") if stop_time and time() > stop_time: print("\nStop tests run because global time limit is exceeded.\n") + stop_tests() raise GlobalTimeout("Stop tests run because global time limit is exceeded") test_case = TestCase(test_suite, case, args, is_concurrent) @@ -2150,9 +2160,11 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): except KeyboardInterrupt as e: print(colored("Break tests execution", args, "red")) + stop_tests() raise e if failures_chain >= args.max_failures_chain: + stop_tests() raise ServerDied("Max failures chain") if failures_total > 0: From 6aa2f7d5a0997fd56432f4550b3353e5d7cc6898 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 3 Jul 2024 17:02:00 +0100 Subject: [PATCH 310/439] adjust reference file --- .../0_stateless/02982_aggregation_states_destruction.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference index 72749c905a3..d00491fd7e5 100644 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference +++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference @@ -1 +1 @@ -1 1 1 +1 From e8701dc4e4b1893dbe507d5180f8367a6d4b7689 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 3 Jul 2024 18:16:26 +0200 Subject: [PATCH 311/439] Fix shutdown in GRPCServer. --- src/Server/GRPCServer.cpp | 102 ++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 43 deletions(-) diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 10b59751b22..cb36df1efc0 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -1735,10 +1735,19 @@ namespace class GRPCServer::Runner { public: - explicit Runner(GRPCServer & owner_) : owner(owner_) {} + explicit Runner(GRPCServer & owner_) : owner(owner_), log(owner.log) {} ~Runner() { + try + { + stop(); + } + catch (...) + { + tryLogCurrentException(log, "~Runner"); + } + if (queue_thread.joinable()) queue_thread.join(); } @@ -1756,13 +1765,27 @@ public: } catch (...) { - tryLogCurrentException("GRPCServer"); + tryLogCurrentException(log, "run"); } }; queue_thread = ThreadFromGlobalPool{runner_function}; } - void stop() { stopReceivingNewCalls(); } + void stop() + { + std::lock_guard lock{mutex}; + should_stop = true; + + if (current_calls.empty()) + { + /// If there are no current calls then we call shutdownQueue() to signal the queue to stop waiting for next events. + /// The following line will make CompletionQueue::Next() stop waiting if the queue is empty and return false instead. + shutdownQueue(); + + /// If there are some current calls then we can't call shutdownQueue() right now because we want to let the current calls finish. + /// In this case function shutdownQueue() will be called later in run(). + } + } size_t getNumCurrentCalls() const { @@ -1789,12 +1812,6 @@ private: [this, call_type](bool ok) { onNewCall(call_type, ok); }); } - void stopReceivingNewCalls() - { - std::lock_guard lock{mutex}; - should_stop = true; - } - void onNewCall(CallType call_type, bool responder_started_ok) { std::lock_guard lock{mutex}; @@ -1827,38 +1844,47 @@ private: void run() { setThreadName("GRPCServerQueue"); - while (true) + + bool ok = false; + void * tag = nullptr; + + while (owner.queue->Next(&tag, &ok)) { - { - std::lock_guard lock{mutex}; - finished_calls.clear(); /// Destroy finished calls. - - /// If (should_stop == true) we continue processing until there is no active calls. - if (should_stop && current_calls.empty()) - { - bool all_responders_gone = std::all_of( - responders_for_new_calls.begin(), responders_for_new_calls.end(), - [](std::unique_ptr & responder) { return !responder; }); - if (all_responders_gone) - break; - } - } - - bool ok = false; - void * tag = nullptr; - if (!owner.queue->Next(&tag, &ok)) - { - /// Queue shutted down. - break; - } - auto & callback = *static_cast(tag); callback(ok); + + std::lock_guard lock{mutex}; + finished_calls.clear(); /// Destroy finished calls. + + /// If (should_stop == true) we continue processing while there are current calls. + if (should_stop && current_calls.empty()) + shutdownQueue(); } + + /// CompletionQueue::Next() returns false if the queue is fully drained and shut down. + } + + /// Shutdown the queue if that isn't done yet. + void shutdownQueue() + { + chassert(should_stop); + if (queue_is_shut_down) + return; + + queue_is_shut_down = true; + + /// Server should be shut down before CompletionQueue. + if (owner.grpc_server) + owner.grpc_server->Shutdown(); + + if (owner.queue) + owner.queue->Shutdown(); } GRPCServer & owner; + LoggerRawPtr log; ThreadFromGlobalPool queue_thread; + bool queue_is_shut_down = false; std::vector> responders_for_new_calls; std::map> current_calls; std::vector> finished_calls; @@ -1876,16 +1902,6 @@ GRPCServer::GRPCServer(IServer & iserver_, const Poco::Net::SocketAddress & addr GRPCServer::~GRPCServer() { - /// Server should be shutdown before CompletionQueue. - if (grpc_server) - grpc_server->Shutdown(); - - /// Completion Queue should be shutdown before destroying the runner, - /// because the runner is now probably executing CompletionQueue::Next() on queue_thread - /// which is blocked until an event is available or the queue is shutting down. - if (queue) - queue->Shutdown(); - runner.reset(); } From 8b14754005b52bea1422021aac0ed774f82a7946 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 16:29:47 +0000 Subject: [PATCH 312/439] Fix ARM build (upgrade sysroot) --- contrib/sysroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sysroot b/contrib/sysroot index 39c4713334f..cc385041b22 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit 39c4713334f9f156dbf508f548d510d9129a657c +Subproject commit cc385041b226d1fc28ead14dbab5d40a5f821dd8 From d0e3a6906015b34290d3ab3fdd0ab67716f55e29 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 18:41:16 +0200 Subject: [PATCH 313/439] make 03008_deduplication_mv_generates_several_blocks_nonreplicated thinner --- .../0_stateless/03008_deduplication.python | 14 +- ...tes_several_blocks_nonreplicated.reference | 256 +++++++++--------- 2 files changed, 135 insertions(+), 135 deletions(-) diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python index 89dbea97667..dd1058518c9 100644 --- a/tests/queries/0_stateless/03008_deduplication.python +++ b/tests/queries/0_stateless/03008_deduplication.python @@ -390,14 +390,14 @@ def test_mv_generates_several_blocks(parser): SELECT throwIf( count() != 5 ) FROM table_a_b; - SELECT throwIf( count() != 47 ) + SELECT throwIf( count() != 9 ) FROM table_when_b_even_and_joined; """ assert_second_insert_statements = f""" SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) FROM table_a_b; - SELECT throwIf( count() != {47 if args.deduplicate_dst_table else 94} ) + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 18} ) FROM table_when_b_even_and_joined; """ else: @@ -406,14 +406,14 @@ def test_mv_generates_several_blocks(parser): SELECT throwIf( count() != {5 if args.deduplicate_src_table else 5} ) FROM table_a_b; - SELECT throwIf( count() != {45 if args.deduplicate_dst_table else 45} ) + SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 10} ) FROM table_when_b_even_and_joined; """ assert_second_insert_statements = f""" SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) FROM table_a_b; - SELECT throwIf( count() != {45 if args.deduplicate_dst_table else 90} ) + SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) FROM table_when_b_even_and_joined; """ else: @@ -421,14 +421,14 @@ def test_mv_generates_several_blocks(parser): SELECT throwIf( count() != {1 if args.deduplicate_src_table else 5} ) FROM table_a_b; - SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 45} ) + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 10} ) FROM table_when_b_even_and_joined; """ assert_second_insert_statements = f""" SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) FROM table_a_b; - SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 90} ) + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 20} ) FROM table_when_b_even_and_joined; """ @@ -451,7 +451,7 @@ def test_mv_generates_several_blocks(parser): ORDER BY (a_join, b); INSERT INTO table_for_join_with SELECT 'joined_' || toString(number), number - FROM numbers(9); + FROM numbers(1); {details_print_for_table_for_join_with} {create_table_a_b_statement} diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference index 76ef4cf6b2c..6e76ec46aa8 100644 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference @@ -3,13 +3,13 @@ Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -18,13 +18,13 @@ Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -33,13 +33,13 @@ Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -48,13 +48,13 @@ Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -63,13 +63,13 @@ Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -78,13 +78,13 @@ Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -93,13 +93,13 @@ Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -108,13 +108,13 @@ Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -123,13 +123,13 @@ Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -138,13 +138,13 @@ Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -153,13 +153,13 @@ Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -168,13 +168,13 @@ Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -183,13 +183,13 @@ Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -198,13 +198,13 @@ Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -213,13 +213,13 @@ Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -228,13 +228,13 @@ Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -243,13 +243,13 @@ Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -258,13 +258,13 @@ Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -273,13 +273,13 @@ Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -288,13 +288,13 @@ Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -303,13 +303,13 @@ Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -318,13 +318,13 @@ Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -333,13 +333,13 @@ Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -348,13 +348,13 @@ Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -363,13 +363,13 @@ Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -378,13 +378,13 @@ Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -393,13 +393,13 @@ Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -408,13 +408,13 @@ Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -423,13 +423,13 @@ Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -438,13 +438,13 @@ Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -453,13 +453,13 @@ Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -468,13 +468,13 @@ Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -483,13 +483,13 @@ Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -498,13 +498,13 @@ Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -513,13 +513,13 @@ Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -528,13 +528,13 @@ Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -543,13 +543,13 @@ Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -558,13 +558,13 @@ Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -573,13 +573,13 @@ Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -588,13 +588,13 @@ Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -603,13 +603,13 @@ Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -618,13 +618,13 @@ Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -633,13 +633,13 @@ Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -648,13 +648,13 @@ Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -663,13 +663,13 @@ Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -678,13 +678,13 @@ Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -693,13 +693,13 @@ Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -708,13 +708,13 @@ Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -723,13 +723,13 @@ Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -738,13 +738,13 @@ Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -753,13 +753,13 @@ Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -768,13 +768,13 @@ Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -783,13 +783,13 @@ Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -798,13 +798,13 @@ Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -813,13 +813,13 @@ Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -828,13 +828,13 @@ Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -843,13 +843,13 @@ Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -858,13 +858,13 @@ Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -873,13 +873,13 @@ Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -888,13 +888,13 @@ Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -903,13 +903,13 @@ Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -918,13 +918,13 @@ Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -933,13 +933,13 @@ Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -948,13 +948,13 @@ Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK From 5875694669acc70b07ea5422902f6a6549f4f62b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 18:48:32 +0200 Subject: [PATCH 314/439] Fix includes --- programs/main.cpp | 1 - src/Common/Coverage.cpp | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/programs/main.cpp b/programs/main.cpp index eecbe3a6876..02ea1471108 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -13,7 +13,6 @@ #include "config.h" #include "config_tools.h" - #include #include #include diff --git a/src/Common/Coverage.cpp b/src/Common/Coverage.cpp index fa8da1f9e15..a21efe62fb6 100644 --- a/src/Common/Coverage.cpp +++ b/src/Common/Coverage.cpp @@ -1,6 +1,26 @@ #include #if defined(SANITIZE_COVERAGE) + +#include +#include + +#include +#include + +#include +#include + +#include + +/// Macros to avoid using strlen(), since it may fail if SSE is not supported. +#define writeError(data) do \ + { \ + static_assert(__builtin_constant_p(data)); \ + if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ + _Exit(1); \ + } while (false) + __attribute__((no_sanitize("coverage"))) void dumpCoverage() { From 045cb0a5819e6e41198cd9cb05e27fb04aa08269 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 12:52:25 +0200 Subject: [PATCH 315/439] Increase special allocation sampling --- src/Common/GWPAsan.cpp | 2 +- src/Core/ServerSettings.h | 2 +- src/IO/BufferWithOwnMemory.h | 15 +++++++++++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/Common/GWPAsan.cpp b/src/Common/GWPAsan.cpp index 0482ddb4e2b..48fbd07ec34 100644 --- a/src/Common/GWPAsan.cpp +++ b/src/Common/GWPAsan.cpp @@ -57,7 +57,7 @@ static bool guarded_alloc_initialized = [] opts.MaxSimultaneousAllocations = 1024; if (!env_options_raw || !std::string_view{env_options_raw}.contains("SampleRate")) - opts.SampleRate = 8000; + opts.SampleRate = 10000; const char * collect_stacktraces = std::getenv("GWP_ASAN_COLLECT_STACKTRACES"); // NOLINT(concurrency-mt-unsafe) if (collect_stacktraces && std::string_view{collect_stacktraces} == "1") diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 68ac45fa24f..e70be61118a 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -153,7 +153,7 @@ namespace DB M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ - M(Double, gwp_asan_force_sample_probability, 0, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ + M(Double, gwp_asan_force_sample_probability, 0.001, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 5c9a69893df..0ec733f7840 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -4,12 +4,15 @@ #include #include +#include #include #include #include +#include "config.h" + namespace ProfileEvents { @@ -41,10 +44,13 @@ struct Memory : boost::noncopyable, Allocator char * m_data = nullptr; size_t alignment = 0; + [[maybe_unused]] bool allow_gwp_asan_force_sample; + Memory() = default; /// If alignment != 0, then allocate memory aligned to specified value. - explicit Memory(size_t size_, size_t alignment_ = 0) : alignment(alignment_) + explicit Memory(size_t size_, size_t alignment_ = 0, bool allow_gwp_asan_force_sample_ = false) + : alignment(alignment_), allow_gwp_asan_force_sample(allow_gwp_asan_force_sample_) { alloc(size_); } @@ -127,6 +133,11 @@ private: ProfileEvents::increment(ProfileEvents::IOBufferAllocs); ProfileEvents::increment(ProfileEvents::IOBufferAllocBytes, new_capacity); +#if USE_GWP_ASAN + if (unlikely(allow_gwp_asan_force_sample && GWPAsan::shouldForceSample())) + gwp_asan::getThreadLocals()->NextSampleCounter = 1; +#endif + m_data = static_cast(Allocator::alloc(new_capacity, alignment)); m_capacity = new_capacity; m_size = new_size; @@ -154,7 +165,7 @@ protected: public: /// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership. explicit BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) - : Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment) + : Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment, /*allow_gwp_asan_force_sample_=*/true) { Base::set(existing_memory ? existing_memory : memory.data(), size); Base::padded = !existing_memory; From 04eb07b453ced85f4dfeb0b12e16777e22055156 Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 3 Jul 2024 12:55:16 -0400 Subject: [PATCH 316/439] Update startup-scripts.md --- docs/en/operations/startup-scripts.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/en/operations/startup-scripts.md b/docs/en/operations/startup-scripts.md index c7842c1472b..91aa4772bcf 100644 --- a/docs/en/operations/startup-scripts.md +++ b/docs/en/operations/startup-scripts.md @@ -1,6 +1,5 @@ --- -slug: /en/operations/startup-scripts.md -sidebar_position: 70 +slug: /en/operations/startup-scripts sidebar_label: Startup Scripts --- From 438fd899236b15468828c3dec751081fd07325d6 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 18:59:07 +0200 Subject: [PATCH 317/439] adjust 03008_deduplication_mv_generates_several_blocks_replicated --- ...erates_several_blocks_replicated.reference | 256 +++++++++--------- 1 file changed, 128 insertions(+), 128 deletions(-) diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference index a84539df16b..a25e8713c61 100644 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference @@ -3,13 +3,13 @@ Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -18,13 +18,13 @@ Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -33,13 +33,13 @@ Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -48,13 +48,13 @@ Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -63,13 +63,13 @@ Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -78,13 +78,13 @@ Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -93,13 +93,13 @@ Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -108,13 +108,13 @@ Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -123,13 +123,13 @@ Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -138,13 +138,13 @@ Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -153,13 +153,13 @@ Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -168,13 +168,13 @@ Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -183,13 +183,13 @@ Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -198,13 +198,13 @@ Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -213,13 +213,13 @@ Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -228,13 +228,13 @@ Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -243,13 +243,13 @@ Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -258,13 +258,13 @@ Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -273,13 +273,13 @@ Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -288,13 +288,13 @@ Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -303,13 +303,13 @@ Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -318,13 +318,13 @@ Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -333,13 +333,13 @@ Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -348,13 +348,13 @@ Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -363,13 +363,13 @@ Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -378,13 +378,13 @@ Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -393,13 +393,13 @@ Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -408,13 +408,13 @@ Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -423,13 +423,13 @@ Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -438,13 +438,13 @@ Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -453,13 +453,13 @@ Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -468,13 +468,13 @@ Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -483,13 +483,13 @@ Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -498,13 +498,13 @@ Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -513,13 +513,13 @@ Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -528,13 +528,13 @@ Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -543,13 +543,13 @@ Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -558,13 +558,13 @@ Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -573,13 +573,13 @@ Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -588,13 +588,13 @@ Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -603,13 +603,13 @@ Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -618,13 +618,13 @@ Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -633,13 +633,13 @@ Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -648,13 +648,13 @@ Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -663,13 +663,13 @@ Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -678,13 +678,13 @@ Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -693,13 +693,13 @@ Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -708,13 +708,13 @@ Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -723,13 +723,13 @@ Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -738,13 +738,13 @@ Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -753,13 +753,13 @@ Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -768,13 +768,13 @@ Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -783,13 +783,13 @@ Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -798,13 +798,13 @@ Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -813,13 +813,13 @@ Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -828,13 +828,13 @@ Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -843,13 +843,13 @@ Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -858,13 +858,13 @@ Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -873,13 +873,13 @@ Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -888,13 +888,13 @@ Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -903,13 +903,13 @@ Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -918,13 +918,13 @@ Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -933,13 +933,13 @@ Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -948,13 +948,13 @@ Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK From d688d4114c6ac915aeb584587b985006a39c9f15 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 18:16:45 +0100 Subject: [PATCH 318/439] Rename events --- src/Common/ProfileEvents.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 2e3984f8f10..acd29a91450 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -239,8 +239,8 @@ M(CannotRemoveEphemeralNode, "Number of times an error happened while trying to remove ephemeral node. This is not an issue, because our implementation of ZooKeeper library guarantee that the session will expire and the node will be removed.") \ \ M(RegexpWithMultipleNeedlesCreated, "Regular expressions with multiple needles (VectorScan library) compiled.") \ - M(RegexpWithMultipleNeedlesCacheHit, "Number of times we fetched compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ - M(RegexpWithMultipleNeedlesCacheMiss, "Number of times we failed to fetch compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ + M(RegexpWithMultipleNeedlesGlobalCacheHit, "Number of times we fetched compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ + M(RegexpWithMultipleNeedlesGlobalCacheMiss, "Number of times we failed to fetch compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ M(RegexpLocalCacheHit, "Number of times we fetched compiled regular expression from a local cache.") \ M(RegexpLocalCacheMiss, "Number of times we failed to fetch compiled regular expression from a local cache.") \ \ From 2ec0a9cfeaf850c31f1567da1884ad77fae0bcdd Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 3 Jul 2024 19:25:44 +0200 Subject: [PATCH 319/439] Fix test test_grpc_protocol/test.py::test_progress --- tests/integration/test_grpc_protocol/test.py | 60 ++++++++------------ 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index 851da99acf3..1e4ae7f0288 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -369,47 +369,33 @@ def test_progress(): "SELECT number, sleep(0.31) FROM numbers(8) SETTINGS max_block_size=2, interactive_delay=100000", stream_output=True, ) - results = list(results) - for result in results: - result.time_zone = "" - result.query_id = "" - # print(results) - # Note: We can't convert those messages to string like `results = str(results)` and then compare it as a string - # because str() can serialize a protobuf message with any order of fields. - expected_results = [ - clickhouse_grpc_pb2.Result( - output_format="TabSeparated", - progress=clickhouse_grpc_pb2.Progress( - read_rows=2, read_bytes=16, total_rows_to_read=8 - ), - ), - clickhouse_grpc_pb2.Result(output=b"0\t0\n1\t0\n"), - clickhouse_grpc_pb2.Result( - progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) - ), - clickhouse_grpc_pb2.Result(output=b"2\t0\n3\t0\n"), - clickhouse_grpc_pb2.Result( - progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) - ), - clickhouse_grpc_pb2.Result(output=b"4\t0\n5\t0\n"), - clickhouse_grpc_pb2.Result( - progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) - ), - clickhouse_grpc_pb2.Result(output=b"6\t0\n7\t0\n"), - clickhouse_grpc_pb2.Result( - stats=clickhouse_grpc_pb2.Stats( - rows=8, - blocks=4, - allocated_bytes=1092, - ) - ), + # Note: We can't compare results using a statement like `assert results == expected_results` + # because `results` can come in slightly different order. + # So we compare `outputs` and `progresses` separately and not `results` as a whole. + + outputs = [i.output for i in results if i.output] + progresses = [i.progress for i in results if i.HasField("progress")] + + # print(outputs) + # print(progresses) + + expected_outputs = [ + b"0\t0\n1\t0\n", + b"2\t0\n3\t0\n", + b"4\t0\n5\t0\n", + b"6\t0\n7\t0\n", ] - # Stats data can be returned, which broke the test - results = [i for i in results if not isinstance(i, clickhouse_grpc_pb2.Stats)] + expected_progresses = [ + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16, total_rows_to_read=8), + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16), + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16), + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16), + ] - assert results == expected_results + assert outputs == expected_outputs + assert progresses == expected_progresses def test_session_settings(): From 9737c5bab4779708e6a51bbb6739d8da34fc87bd Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 17:33:45 +0000 Subject: [PATCH 320/439] Probably fix tsan assert in test_mysql_killed_while_insert_8_0 --- contrib/openssl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/openssl b/contrib/openssl index 5d81fa7068f..ee2bb8513b2 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit 5d81fa7068fc8c07f4d0997d5b703f3c541a637c +Subproject commit ee2bb8513b28bf86b35404dd17a0e29305ca9e08 From a19eb8686492baee1084e44217f3e6c2f8594d54 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Jul 2024 19:39:00 +0200 Subject: [PATCH 321/439] Review fixes 2 --- docker/test/fasttest/run.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 2bed4c5c343..c80ea193010 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -305,6 +305,8 @@ function run_tests clickhouse stop --pid-path "$FASTTEST_DATA" } +export -f run_tests + case "$stage" in "") ls -la From 87dda31a2c234a7596b661ad5d63f74f3124ea25 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 3 Jul 2024 20:36:19 +0200 Subject: [PATCH 322/439] Add test for GRPCServer's shutdown. --- tests/integration/test_grpc_protocol/test.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index 851da99acf3..1ace5b361b8 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -39,6 +39,7 @@ node = cluster.add_instance( "TSAN_OPTIONS": "report_atomic_races=0 " + os.getenv("TSAN_OPTIONS", default="") }, ipv6_address=IPV6_ADDRESS, + stay_alive=True, ) main_channel = None @@ -763,3 +764,9 @@ def test_opentelemetry_context_propagation(): ) == "SELECT 1\tsome custom state\n" ) + + +def test_restart(): + assert query("SELECT 1") == "1\n" + node.restart_clickhouse() + assert query("SELECT 2") == "2\n" From 877445c88d71caffc79e6e7cd956298e84e9867e Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 19:21:12 +0000 Subject: [PATCH 323/439] Fix reading dynamic subcolumns from altered Memory table --- src/Interpreters/getColumnFromBlock.cpp | 30 +++++++++++++++++++ src/Interpreters/getColumnFromBlock.h | 1 + .../QueryPlan/ReadFromMemoryStorageStep.cpp | 20 +++++++++---- ...3200_memory_engine_alter_dynamic.reference | 10 +++++++ .../03200_memory_engine_alter_dynamic.sql | 7 +++++ 5 files changed, 62 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference create mode 100644 tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql diff --git a/src/Interpreters/getColumnFromBlock.cpp b/src/Interpreters/getColumnFromBlock.cpp index 972e109afb3..2e70a58b5a1 100644 --- a/src/Interpreters/getColumnFromBlock.cpp +++ b/src/Interpreters/getColumnFromBlock.cpp @@ -31,6 +31,36 @@ ColumnPtr tryGetColumnFromBlock(const Block & block, const NameAndTypePair & req return castColumn({elem_column, elem_type, ""}, requested_column.type); } +ColumnPtr tryGetSubcolumnFromBlock(const Block & block, const DataTypePtr & requested_column_type, const NameAndTypePair & requested_subcolumn) +{ + const auto * elem = block.findByName(requested_subcolumn.getNameInStorage()); + if (!elem) + return nullptr; + + auto subcolumn_name = requested_subcolumn.getSubcolumnName(); + /// If requested subcolumn is dynamic, we should first perform cast and then + /// extract the subcolumn, because the data of dynamic subcolumn can change after cast. + if (elem->type->hasDynamicSubcolumns() && !elem->type->equals(*requested_column_type)) + { + auto casted_column = castColumn({elem->column, elem->type, ""}, requested_column_type); + auto elem_column = requested_column_type->tryGetSubcolumn(subcolumn_name, casted_column); + auto elem_type = requested_column_type->tryGetSubcolumnType(subcolumn_name); + + if (!elem_type || !elem_column) + return nullptr; + + return elem_column; + } + + auto elem_column = elem->type->tryGetSubcolumn(subcolumn_name, elem->column); + auto elem_type = elem->type->tryGetSubcolumnType(subcolumn_name); + + if (!elem_type || !elem_column) + return nullptr; + + return castColumn({elem_column, elem_type, ""}, requested_subcolumn.type); +} + ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & requested_column) { auto result_column = tryGetColumnFromBlock(block, requested_column); diff --git a/src/Interpreters/getColumnFromBlock.h b/src/Interpreters/getColumnFromBlock.h index 26500cfdd17..737ce9db555 100644 --- a/src/Interpreters/getColumnFromBlock.h +++ b/src/Interpreters/getColumnFromBlock.h @@ -9,5 +9,6 @@ namespace DB ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & requested_column); ColumnPtr tryGetColumnFromBlock(const Block & block, const NameAndTypePair & requested_column); +ColumnPtr tryGetSubcolumnFromBlock(const Block & block, const DataTypePtr & requested_column_type, const NameAndTypePair & requested_subcolumn); } diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp index 2e7693b1b36..6dc0c021a14 100644 --- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp @@ -30,12 +30,15 @@ public: std::shared_ptr> parallel_execution_index_, InitializerFunc initializer_func_ = {}) : ISource(storage_snapshot->getSampleBlockForColumns(column_names_)) - , column_names_and_types(storage_snapshot->getColumnsByNames( + , requested_column_names_and_types(storage_snapshot->getColumnsByNames( GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withExtendedObjects(), column_names_)) , data(data_) , parallel_execution_index(parallel_execution_index_) , initializer_func(std::move(initializer_func_)) { + auto all_column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withExtendedObjects()); + for (const auto & [name, type] : all_column_names_and_types) + all_names_to_types[name] = type; } String getName() const override { return "Memory"; } @@ -59,17 +62,20 @@ protected: const Block & src = (*data)[current_index]; Columns columns; - size_t num_columns = column_names_and_types.size(); + size_t num_columns = requested_column_names_and_types.size(); columns.reserve(num_columns); - auto name_and_type = column_names_and_types.begin(); + auto name_and_type = requested_column_names_and_types.begin(); for (size_t i = 0; i < num_columns; ++i) { - columns.emplace_back(tryGetColumnFromBlock(src, *name_and_type)); + if (name_and_type->isSubcolumn()) + columns.emplace_back(tryGetSubcolumnFromBlock(src, all_names_to_types[name_and_type->getNameInStorage()], *name_and_type)); + else + columns.emplace_back(tryGetColumnFromBlock(src, *name_and_type)); ++name_and_type; } - fillMissingColumns(columns, src.rows(), column_names_and_types, column_names_and_types, {}, nullptr); + fillMissingColumns(columns, src.rows(), requested_column_names_and_types, requested_column_names_and_types, {}, nullptr); assert(std::all_of(columns.begin(), columns.end(), [](const auto & column) { return column != nullptr; })); return Chunk(std::move(columns), src.rows()); @@ -88,7 +94,9 @@ private: } } - const NamesAndTypesList column_names_and_types; + const NamesAndTypesList requested_column_names_and_types; + /// Map (name -> type) for all columns from the storage header. + std::unordered_map all_names_to_types; size_t execution_index = 0; std::shared_ptr data; std::shared_ptr> parallel_execution_index; diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference new file mode 100644 index 00000000000..6d2c1334d6e --- /dev/null +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference @@ -0,0 +1,10 @@ +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql new file mode 100644 index 00000000000..95823283812 --- /dev/null +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql @@ -0,0 +1,7 @@ +set allow_experimental_dynamic_type=1; +create table test (d Dynamic) engine=Memory; +insert into table test select * from numbers(5); +alter table test modify column d Dynamic(max_types=1); +select d.UInt64 from test settings allow_experimental_analyzer=1; +select d.UInt64 from test settings allow_experimental_analyzer=1; + From 31ca631ac583b88b1ef7e083e03d53712a87ada8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 3 Jul 2024 21:58:18 +0200 Subject: [PATCH 324/439] Pin all python packages in CI images --- docker/reqgenerator.py | 42 +++++++ docker/test/fasttest/Dockerfile | 6 +- docker/test/fasttest/requirements.txt | 41 +++++++ docker/test/fuzzer/Dockerfile | 3 +- docker/test/fuzzer/requirements.txt | 27 +++++ docker/test/integration/base/Dockerfile | 3 +- docker/test/integration/base/requirements.txt | 26 ++++ docker/test/integration/resolver/Dockerfile | 3 +- .../integration/resolver/requirements.txt | 6 + docker/test/integration/runner/Dockerfile | 46 +------ .../test/integration/runner/requirements.txt | 113 ++++++++++++++++++ docker/test/libfuzzer/Dockerfile | 4 +- docker/test/libfuzzer/requirements.txt | 27 +++++ docker/test/performance-comparison/Dockerfile | 5 +- .../performance-comparison/requirements.txt | 32 +++++ docker/test/sqllogic/Dockerfile | 7 +- docker/test/sqllogic/requirements.txt | 30 +++++ docker/test/sqltest/Dockerfile | 5 +- docker/test/sqltest/requirements.txt | 29 +++++ docker/test/stateful/Dockerfile | 1 - docker/test/stateless/Dockerfile | 6 +- docker/test/stateless/requirements.txt | 51 ++++++++ docker/test/style/Dockerfile | 18 +-- docker/test/style/requirements.txt | 58 +++++++++ 24 files changed, 506 insertions(+), 83 deletions(-) create mode 100644 docker/reqgenerator.py create mode 100644 docker/test/fasttest/requirements.txt create mode 100644 docker/test/fuzzer/requirements.txt create mode 100644 docker/test/integration/base/requirements.txt create mode 100644 docker/test/integration/resolver/requirements.txt create mode 100644 docker/test/integration/runner/requirements.txt create mode 100644 docker/test/libfuzzer/requirements.txt create mode 100644 docker/test/performance-comparison/requirements.txt create mode 100644 docker/test/sqllogic/requirements.txt create mode 100644 docker/test/sqltest/requirements.txt create mode 100644 docker/test/stateless/requirements.txt create mode 100644 docker/test/style/requirements.txt diff --git a/docker/reqgenerator.py b/docker/reqgenerator.py new file mode 100644 index 00000000000..89b901413d6 --- /dev/null +++ b/docker/reqgenerator.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# To run this script you must install docker and piddeptree python package +# + +import subprocess +import os +import sys + +def build_docker_deps(image_name, imagedir): + cmd = f"""docker run --entrypoint "/bin/bash" {image_name} -c "pip install pipdeptree 2>/dev/null 1>/dev/null && pipdeptree --freeze --warn silence | sed 's/ \+//g' | sort | uniq" > {imagedir}/requirements.txt""" + subprocess.check_call(cmd, shell=True) + +def check_docker_file_install_with_pip(filepath): + image_name = None + with open(filepath, 'r') as f: + for line in f: + if 'docker build' in line: + arr = line.split(' ') + if len(arr) > 4: + image_name = arr[4] + if 'pip3 install' in line or 'pip install' in line: + return image_name, True + return image_name, False + +def process_affected_images(images_dir): + for root, _dirs, files in os.walk(images_dir): + for f in files: + if f == "Dockerfile": + docker_file_path = os.path.join(root, f) + print("Checking image on path", docker_file_path) + image_name, has_pip = check_docker_file_install_with_pip(docker_file_path) + if has_pip: + print("Find pip in", image_name) + try: + build_docker_deps(image_name, root) + except Exception as ex: + print(ex) + else: + print("Pip not found in", docker_file_path) + + +process_affected_images(sys.argv[1]) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index e0be261d5e8..5f92db5a3ee 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -19,10 +19,7 @@ RUN apt-get update \ odbcinst \ psmisc \ python3 \ - python3-lxml \ python3-pip \ - python3-requests \ - python3-termcolor \ unixodbc \ pv \ jq \ @@ -31,7 +28,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt # This symlink is required by gcc to find the lld linker RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld diff --git a/docker/test/fasttest/requirements.txt b/docker/test/fasttest/requirements.txt new file mode 100644 index 00000000000..993ea22e5ae --- /dev/null +++ b/docker/test/fasttest/requirements.txt @@ -0,0 +1,41 @@ +Jinja2==3.1.3 +MarkupSafe==2.1.5 +PyJWT==2.3.0 +PyYAML==6.0.1 +Pygments==2.11.2 +SecretStorage==3.3.1 +blinker==1.4 +certifi==2020.6.20 +chardet==4.0.0 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +idna==3.3 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==4.8.0 +more-itertools==8.10.0 +numpy==1.26.3 +oauthlib==3.2.0 +packaging==24.1 +pandas==1.5.3 +pip==24.1.1 +pipdeptree==2.23.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +python-dateutil==2.9.0.post0 +pytz==2024.1 +requests==2.32.3 +scipy==1.12.0 +setuptools==59.6.0 +six==1.16.0 +termcolor==1.1.0 +urllib3==1.26.5 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index d3f78ac1d95..e1fb09b8ed5 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -31,7 +31,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install Jinja2 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt COPY * / diff --git a/docker/test/fuzzer/requirements.txt b/docker/test/fuzzer/requirements.txt new file mode 100644 index 00000000000..3dce93e023b --- /dev/null +++ b/docker/test/fuzzer/requirements.txt @@ -0,0 +1,27 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +Jinja2==3.1.4 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +MarkupSafe==2.1.5 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 270b40e23a6..469251f648c 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -33,7 +33,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install pycurl +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r requirements.txt && rm -rf /root/.cache/pip # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH diff --git a/docker/test/integration/base/requirements.txt b/docker/test/integration/base/requirements.txt new file mode 100644 index 00000000000..d195d8deaf6 --- /dev/null +++ b/docker/test/integration/base/requirements.txt @@ -0,0 +1,26 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +pycurl==7.45.3 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/integration/resolver/Dockerfile b/docker/test/integration/resolver/Dockerfile index 01b9b777614..b35a7262651 100644 --- a/docker/test/integration/resolver/Dockerfile +++ b/docker/test/integration/resolver/Dockerfile @@ -2,4 +2,5 @@ # Helper docker container to run python bottle apps FROM python:3 -RUN python -m pip install bottle +COPY requirements.txt / +RUN python -m pip install --no-cache-dir -r requirements.txt diff --git a/docker/test/integration/resolver/requirements.txt b/docker/test/integration/resolver/requirements.txt new file mode 100644 index 00000000000..fbf85295329 --- /dev/null +++ b/docker/test/integration/resolver/requirements.txt @@ -0,0 +1,6 @@ +bottle==0.12.25 +packaging==24.1 +pip==23.2.1 +pipdeptree==2.23.0 +setuptools==69.0.3 +wheel==0.42.0 diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 23d8a37d822..d250b746e7d 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -26,7 +26,6 @@ RUN apt-get update \ libicu-dev \ bsdutils \ curl \ - python3-pika \ liblua5.1-dev \ luajit \ libssl-dev \ @@ -61,49 +60,8 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \ # kazoo 2.10.0 is broken # https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html -RUN python3 -m pip install --no-cache-dir \ - PyMySQL==1.1.0 \ - asyncio==3.4.3 \ - avro==1.10.2 \ - azure-storage-blob==12.19.0 \ - boto3==1.34.24 \ - cassandra-driver==3.29.0 \ - confluent-kafka==2.3.0 \ - delta-spark==2.3.0 \ - dict2xml==1.7.4 \ - dicttoxml==1.7.16 \ - docker==6.1.3 \ - docker-compose==1.29.2 \ - grpcio==1.60.0 \ - grpcio-tools==1.60.0 \ - kafka-python==2.0.2 \ - lz4==4.3.3 \ - minio==7.2.3 \ - nats-py==2.6.0 \ - protobuf==4.25.2 \ - kazoo==2.9.0 \ - psycopg2-binary==2.9.6 \ - pyhdfs==0.3.1 \ - pymongo==3.11.0 \ - pyspark==3.3.2 \ - pytest==7.4.4 \ - pytest-order==1.0.0 \ - pytest-random==0.2 \ - pytest-repeat==0.9.3 \ - pytest-timeout==2.2.0 \ - pytest-xdist==3.5.0 \ - pytest-reportlog==0.4.0 \ - pytz==2023.3.post1 \ - pyyaml==5.3.1 \ - redis==5.0.1 \ - requests-kerberos==0.14.0 \ - tzlocal==2.1 \ - retry==0.9.2 \ - bs4==0.0.2 \ - lxml==5.1.0 \ - urllib3==2.0.7 \ - jwcrypto==1.5.6 -# bs4, lxml are for cloud tests, do not delete +COPY requirements.txt / +RUN python3 -m pip install --no-cache-dir -r requirements.txt # Hudi supports only spark 3.3.*, not 3.4 RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \ diff --git a/docker/test/integration/runner/requirements.txt b/docker/test/integration/runner/requirements.txt new file mode 100644 index 00000000000..8a77d8abf77 --- /dev/null +++ b/docker/test/integration/runner/requirements.txt @@ -0,0 +1,113 @@ +PyHDFS==0.3.1 +PyJWT==2.3.0 +PyMySQL==1.1.0 +PyNaCl==1.5.0 +PyYAML==5.3.1 +SecretStorage==3.3.1 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +async-timeout==4.0.3 +asyncio==3.4.3 +attrs==23.2.0 +avro==1.10.2 +azure-core==1.30.1 +azure-storage-blob==12.19.0 +bcrypt==4.1.3 +beautifulsoup4==4.12.3 +blinker==1.4 +boto3==1.34.24 +botocore==1.34.101 +bs4==0.0.2 +cassandra-driver==3.29.0 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +confluent-kafka==2.3.0 +cryptography==3.4.8 +dbus-python==1.2.18 +decorator==5.1.1 +delta-spark==2.3.0 +dict2xml==1.7.4 +dicttoxml==1.7.16 +distro-info==1.1+ubuntu0.2 +distro==1.7.0 +docker-compose==1.29.2 +docker==6.1.3 +dockerpty==0.4.1 +docopt==0.6.2 +exceptiongroup==1.2.1 +execnet==2.1.1 +geomet==0.2.1.post1 +grpcio-tools==1.60.0 +grpcio==1.60.0 +gssapi==1.8.3 +httplib2==0.20.2 +idna==3.7 +importlib-metadata==4.6.4 +iniconfig==2.0.0 +isodate==0.6.1 +jeepney==0.7.1 +jmespath==1.0.1 +jsonschema==3.2.0 +jwcrypto==1.5.6 +kafka-python==2.0.2 +kazoo==2.9.0 +keyring==23.5.0 +krb5==0.5.1 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==5.1.0 +lz4==4.3.3 +minio==7.2.3 +more-itertools==8.10.0 +nats-py==2.6.0 +oauthlib==3.2.0 +packaging==24.0 +paramiko==3.4.0 +pika==1.2.0 +pip==24.1.1 +pipdeptree==2.23.0 +pluggy==1.5.0 +protobuf==4.25.2 +psycopg2-binary==2.9.6 +py4j==0.10.9.5 +py==1.11.0 +pycparser==2.22 +pycryptodome==3.20.0 +pymongo==3.11.0 +pyparsing==2.4.7 +pyrsistent==0.20.0 +pyspark==3.3.2 +pyspnego==0.10.2 +pytest-order==1.0.0 +pytest-random==0.2 +pytest-repeat==0.9.3 +pytest-reportlog==0.4.0 +pytest-timeout==2.2.0 +pytest-xdist==3.5.0 +pytest==7.4.4 +python-apt==2.4.0+ubuntu3 +python-dateutil==2.9.0.post0 +python-dotenv==0.21.1 +pytz==2023.3.post1 +redis==5.0.1 +requests-kerberos==0.14.0 +requests==2.31.0 +retry==0.9.2 +s3transfer==0.10.1 +setuptools==59.6.0 +simplejson==3.19.2 +six==1.16.0 +soupsieve==2.5 +texttable==1.7.0 +tomli==2.0.1 +typing_extensions==4.11.0 +tzlocal==2.1 +unattended-upgrades==0.1 +urllib3==2.0.7 +wadllib==1.3.6 +websocket-client==0.59.0 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/libfuzzer/Dockerfile b/docker/test/libfuzzer/Dockerfile index c9802a0e44e..e6eb2ae336e 100644 --- a/docker/test/libfuzzer/Dockerfile +++ b/docker/test/libfuzzer/Dockerfile @@ -1,3 +1,4 @@ +# docker build -t clickhouse/libfuzzer . ARG FROM_TAG=latest FROM clickhouse/test-base:$FROM_TAG @@ -29,7 +30,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install Jinja2 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt COPY * / diff --git a/docker/test/libfuzzer/requirements.txt b/docker/test/libfuzzer/requirements.txt new file mode 100644 index 00000000000..3dce93e023b --- /dev/null +++ b/docker/test/libfuzzer/requirements.txt @@ -0,0 +1,27 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +Jinja2==3.1.4 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +MarkupSafe==2.1.5 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 1835900b316..c68a39f6f70 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -23,7 +23,6 @@ RUN apt-get update \ python3 \ python3-dev \ python3-pip \ - python3-setuptools \ rsync \ tree \ tzdata \ @@ -33,12 +32,14 @@ RUN apt-get update \ cargo \ ripgrep \ zstd \ - && pip3 --no-cache-dir install 'clickhouse-driver==0.2.1' scipy \ && apt-get purge --yes python3-dev g++ \ && apt-get autoremove --yes \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* +COPY requirements.txt / +RUN pip3 --no-cache-dir install -r requirements.txt + COPY run.sh / CMD ["bash", "/run.sh"] diff --git a/docker/test/performance-comparison/requirements.txt b/docker/test/performance-comparison/requirements.txt new file mode 100644 index 00000000000..932527cc022 --- /dev/null +++ b/docker/test/performance-comparison/requirements.txt @@ -0,0 +1,32 @@ +blinker==1.4 +clickhouse-driver==0.2.7 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +numpy==1.26.3 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +Pygments==2.11.2 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +pytz==2023.4 +PyYAML==6.0.1 +scipy==1.12.0 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +tzlocal==2.1 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 1ea1e52e6fa..1425e12cd84 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -18,11 +18,8 @@ RUN apt-get update --yes \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install \ - numpy \ - pyodbc \ - deepdiff \ - sqlglot +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz" diff --git a/docker/test/sqllogic/requirements.txt b/docker/test/sqllogic/requirements.txt new file mode 100644 index 00000000000..abc0a368659 --- /dev/null +++ b/docker/test/sqllogic/requirements.txt @@ -0,0 +1,30 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +deepdiff==7.0.1 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +numpy==1.26.4 +oauthlib==3.2.0 +ordered-set==4.1.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyodbc==5.1.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +sqlglot==23.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/sqltest/Dockerfile b/docker/test/sqltest/Dockerfile index 7f59f65761f..71d915b0c7a 100644 --- a/docker/test/sqltest/Dockerfile +++ b/docker/test/sqltest/Dockerfile @@ -14,9 +14,8 @@ RUN apt-get update --yes \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install \ - pyyaml \ - clickhouse-driver +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt ARG sqltest_repo="https://github.com/elliotchance/sqltest/" diff --git a/docker/test/sqltest/requirements.txt b/docker/test/sqltest/requirements.txt new file mode 100644 index 00000000000..4a0ae3edbac --- /dev/null +++ b/docker/test/sqltest/requirements.txt @@ -0,0 +1,29 @@ +blinker==1.4 +clickhouse-driver==0.2.7 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +pytz==2024.1 +PyYAML==6.0.1 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +tzlocal==5.2 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 355e70f180e..0daf88cad7e 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -6,7 +6,6 @@ FROM clickhouse/stateless-test:$FROM_TAG RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --yes --no-install-recommends \ - python3-requests \ nodejs \ npm \ && apt-get clean \ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index c3d80a7334b..5a655a3fd2b 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -25,10 +25,7 @@ RUN apt-get update -y \ openssl \ postgresql-client \ python3 \ - python3-lxml \ python3-pip \ - python3-requests \ - python3-termcolor \ qemu-user-static \ sqlite3 \ sudo \ @@ -51,7 +48,8 @@ RUN curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PR && unzip protoc-${PROTOC_VERSION}-linux-x86_64.zip -d /usr/local \ && rm protoc-${PROTOC_VERSION}-linux-x86_64.zip -RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 pyarrow==15.0.0 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && cd /tmp/clickhouse-odbc-tmp \ diff --git a/docker/test/stateless/requirements.txt b/docker/test/stateless/requirements.txt new file mode 100644 index 00000000000..3284107e24e --- /dev/null +++ b/docker/test/stateless/requirements.txt @@ -0,0 +1,51 @@ +awscli==1.22.34 +blinker==1.4 +botocore==1.23.34 +certifi==2020.6.20 +chardet==4.0.0 +colorama==0.4.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +docutils==0.17.1 +gyp==0.1 +httplib2==0.20.2 +idna==3.3 +importlib-metadata==4.6.4 +jeepney==0.7.1 +Jinja2==3.1.3 +jmespath==0.10.0 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==4.8.0 +MarkupSafe==2.1.5 +more-itertools==8.10.0 +numpy==1.26.3 +oauthlib==3.2.0 +packaging==24.1 +pandas==1.5.3 +pip==24.1.1 +pipdeptree==2.23.0 +pyarrow==15.0.0 +pyasn1==0.4.8 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +python-dateutil==2.8.1 +pytz==2024.1 +PyYAML==6.0.1 +requests==2.32.3 +roman==3.3 +rsa==4.8 +s3transfer==0.5.0 +scipy==1.12.0 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +termcolor==1.1.0 +urllib3==1.26.5 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 7cd712b73f6..cdc1d1fa095 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -23,22 +23,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* # python-magic is the same version as in Ubuntu 22.04 -RUN pip3 install \ - PyGithub \ - black==23.12.0 \ - boto3 \ - codespell==2.2.1 \ - mypy==1.8.0 \ - pylint==3.1.0 \ - python-magic==0.4.24 \ - flake8==4.0.1 \ - requests \ - thefuzz \ - tqdm==4.66.4 \ - types-requests \ - unidiff \ - jwt \ - && rm -rf /root/.cache/pip +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r requirements.txt RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 ENV LC_ALL en_US.UTF-8 diff --git a/docker/test/style/requirements.txt b/docker/test/style/requirements.txt new file mode 100644 index 00000000000..bb0cd55dd1a --- /dev/null +++ b/docker/test/style/requirements.txt @@ -0,0 +1,58 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +astroid==3.1.0 +async-timeout==4.0.3 +attrs==23.2.0 +black==23.12.0 +boto3==1.34.131 +botocore==1.34.131 +certifi==2024.6.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +codespell==2.2.1 +cryptography==42.0.8 +Deprecated==1.2.14 +dill==0.3.8 +flake8==4.0.1 +frozenlist==1.4.1 +idna==3.7 +isort==5.13.2 +jmespath==1.0.1 +jwt==1.3.1 +mccabe==0.6.1 +multidict==6.0.5 +mypy==1.8.0 +mypy-extensions==1.0.0 +packaging==24.1 +pathspec==0.9.0 +pip==24.1.1 +pipdeptree==2.23.0 +platformdirs==4.2.2 +pycodestyle==2.8.0 +pycparser==2.22 +pyflakes==2.4.0 +PyGithub==2.3.0 +PyJWT==2.8.0 +pylint==3.1.0 +PyNaCl==1.5.0 +python-dateutil==2.9.0.post0 +python-magic==0.4.24 +PyYAML==6.0.1 +rapidfuzz==3.9.3 +requests==2.32.3 +s3transfer==0.10.1 +setuptools==59.6.0 +six==1.16.0 +thefuzz==0.22.1 +tomli==2.0.1 +tomlkit==0.12.5 +tqdm==4.66.4 +types-requests==2.32.0.20240622 +typing_extensions==4.12.2 +unidiff==0.7.5 +urllib3==2.2.2 +wheel==0.37.1 +wrapt==1.16.0 +yamllint==1.26.3 +yarl==1.9.4 From 1b6ef06a91cf31b2aa8dbe5e3494ec63d602e4c9 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 3 Jul 2024 21:14:28 +0100 Subject: [PATCH 325/439] review fixes --- src/Common/CgroupsMemoryUsageObserver.cpp | 26 ++++++++++++----------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 33393a8b9c6..d36c7fd08aa 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -1,5 +1,3 @@ -#include -#include #include #if defined(OS_LINUX) @@ -14,7 +12,9 @@ #include #include +#include #include +#include #include #include "config.h" @@ -59,9 +59,9 @@ uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & ke } assertChar(' ', buf); - uint64_t mem_usage = 0; - readIntText(mem_usage, buf); - return mem_usage; + uint64_t value = 0; + readIntText(value, buf); + return value; } throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find '{}' in '{}'", key, buf.getFileName()); @@ -96,10 +96,12 @@ struct CgroupsV2Reader : ICgroupsReader current_buf.rewind(); stat_buf.rewind(); - uint64_t mem_usage = 0; + int64_t mem_usage = 0; /// memory.current contains a single number + /// the reason why we subtract it described here: https://github.com/ClickHouse/ClickHouse/issues/64652#issuecomment-2149630667 readIntText(mem_usage, current_buf); mem_usage -= readMetricFromStatFile(stat_buf, "inactive_file"); + chassert(mem_usage >= 0, "Negative memory usage"); return mem_usage; } @@ -153,13 +155,13 @@ std::optional getCgroupsV1Path() std::pair getCgroupsPath() { - auto v2_file_name = getCgroupsV2Path(); - if (v2_file_name.has_value()) - return {*v2_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; + auto v2_path = getCgroupsV2Path(); + if (v2_path.has_value()) + return {*v2_path, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; - auto v1_file_name = getCgroupsV1Path(); - if (v1_file_name.has_value()) - return {*v1_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V1}; + auto v1_path = getCgroupsV1Path(); + if (v1_path.has_value()) + return {*v1_path, CgroupsMemoryUsageObserver::CgroupsVersion::V1}; throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot find cgroups v1 or v2 current memory file"); } From 5fb0fa3c3d3e611944f83ac06aaac2d6e5c0d0db Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 3 Jul 2024 20:21:37 +0000 Subject: [PATCH 326/439] Automatic style fix --- tests/integration/test_memory_limit_observer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index 2840c830396..0eda165b1d2 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -76,4 +76,4 @@ def test_memory_usage_doesnt_include_page_cache_size(started_cluster): WHERE logger_name = 'CgroupsMemoryUsageObserver' AND message LIKE 'Read current memory usage%bytes%' """ ).strip() - assert int(max_mem_usage_from_cgroup) < 2 * 2 ** 30 + assert int(max_mem_usage_from_cgroup) < 2 * 2**30 From afac188ae31784520e8f093736ca8fd9427b685b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 3 Jul 2024 20:52:54 +0000 Subject: [PATCH 327/439] Automatic style fix --- docker/reqgenerator.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/docker/reqgenerator.py b/docker/reqgenerator.py index 89b901413d6..6c1d89ac0ac 100644 --- a/docker/reqgenerator.py +++ b/docker/reqgenerator.py @@ -6,29 +6,34 @@ import subprocess import os import sys + def build_docker_deps(image_name, imagedir): cmd = f"""docker run --entrypoint "/bin/bash" {image_name} -c "pip install pipdeptree 2>/dev/null 1>/dev/null && pipdeptree --freeze --warn silence | sed 's/ \+//g' | sort | uniq" > {imagedir}/requirements.txt""" subprocess.check_call(cmd, shell=True) + def check_docker_file_install_with_pip(filepath): image_name = None - with open(filepath, 'r') as f: + with open(filepath, "r") as f: for line in f: - if 'docker build' in line: - arr = line.split(' ') + if "docker build" in line: + arr = line.split(" ") if len(arr) > 4: image_name = arr[4] - if 'pip3 install' in line or 'pip install' in line: + if "pip3 install" in line or "pip install" in line: return image_name, True return image_name, False + def process_affected_images(images_dir): for root, _dirs, files in os.walk(images_dir): for f in files: if f == "Dockerfile": docker_file_path = os.path.join(root, f) print("Checking image on path", docker_file_path) - image_name, has_pip = check_docker_file_install_with_pip(docker_file_path) + image_name, has_pip = check_docker_file_install_with_pip( + docker_file_path + ) if has_pip: print("Find pip in", image_name) try: From eb7ab5128d009fe89ef1994e2f32ea10bea900b1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 21:07:59 +0000 Subject: [PATCH 328/439] Clean-up custom LLVM 15 patches --- contrib/llvm-project | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/llvm-project b/contrib/llvm-project index d2142eed980..2a8967b60cb 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit d2142eed98046a47ff7112e3cc1e197c8a5cd80f +Subproject commit 2a8967b60cbe5bc2df253712bac343cc5263c5fc From c4005d7e06ba4f775f34db01ff9fc7b61c1fefb0 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Jul 2024 23:13:50 +0200 Subject: [PATCH 329/439] Fix fasttest --- docker/test/fasttest/run.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index c80ea193010..c015d3a3542 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -84,6 +84,8 @@ function start_server echo "ClickHouse server pid '$server_pid' started and responded" } +export -f start_server + function clone_root { [ "$UID" -eq 0 ] && git config --global --add safe.directory "$FASTTEST_SOURCE" From 05e1e0f6013909838feec179ec3fdcc97b63e261 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Jul 2024 23:15:24 +0200 Subject: [PATCH 330/439] 03167_base64_url_functions_sh.sh add tag no-fasttest --- tests/queries/0_stateless/03167_base64_url_functions_sh.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03167_base64_url_functions_sh.sh b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh index ec3170b165c..f4d5addf370 100755 --- a/tests/queries/0_stateless/03167_base64_url_functions_sh.sh +++ b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-fasttest # shellcheck disable=SC2155 set -e From fe42d2ffe49addd28716585a4e2f3b8a53a3d6b0 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 4 Jul 2024 08:10:40 +0100 Subject: [PATCH 331/439] Lower sampling rate --- src/Core/ServerSettings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index e70be61118a..6c62ab6def8 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -153,7 +153,7 @@ namespace DB M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ - M(Double, gwp_asan_force_sample_probability, 0.001, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ + M(Double, gwp_asan_force_sample_probability, 0.0005, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp From e38e105e20ae6406a60baa0a08beed518676b346 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 18 Apr 2024 16:44:20 +0800 Subject: [PATCH 332/439] add window function percent_rank --- .../sql-reference/window-functions/index.md | 1 + src/Processors/Transforms/WindowTransform.cpp | 171 ++++++++++++++++-- .../01592_window_functions.reference | 12 ++ .../0_stateless/01592_window_functions.sql | 18 ++ 4 files changed, 184 insertions(+), 18 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 49076f3cbe1..8097abc0b15 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -23,6 +23,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | `GROUPS` frame | ❌ | | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | | `rank()`, `dense_rank()`, `row_number()` | ✅ | +| `percent_rank()` | ✅ equal to `ifNull((rank() OVER(PARTITION BY x order by y) - 1) / nullif(count(1) OVER(PARTITION BY x) -1, 0), 0)`, but more efficent| | `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | | ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index b9f61d30182..0c7caca9de5 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -16,6 +16,10 @@ #include #include #include +#include "WindowTransform.h" + +#include +#include #include @@ -1609,8 +1613,37 @@ struct WindowFunctionHelpers { recurrent_detail::setValueToOutputColumn(transform, function_index, value); } + + ALWAYS_INLINE static bool checkPartitionEnterFirstRow(const WindowTransform * transform) { return transform->current_row_number == 1; } + + ALWAYS_INLINE static bool checkPartitionEnterLastRow(const WindowTransform * transform) + { + /// when partition_ended is false, it means that we don't reach the last row in this partition. + /// But when partition_ended is true, it doesn't mean that we reach the last row in this partition. + /// partition_ended is true when + /// - the input has finished. or + /// - current block contains next partition's data. + /// This is for fast check. + if (!transform->partition_ended) + return false; + + auto current_row = transform->current_row; + current_row.row++; + const auto & partitoin_end_row = transform->partition_end; + /// If current_row == partitoin_end_row, return true. otherwise + if (current_row != partitoin_end_row) + { + if (current_row.row < transform->blockRowsNumber(current_row)) + return false; + /// Next row to current_row may belong to next block. + if (partitoin_end_row.block != current_row.block + 1 || partitoin_end_row.row) + return false; + } + return true; + } }; + template struct StatefulWindowFunction : public WindowFunction { @@ -1639,6 +1672,8 @@ struct StatefulWindowFunction : public WindowFunction { return *reinterpret_cast(workspace.aggregate_function_state.data()); } + + }; struct ExponentialTimeDecayedSumState @@ -2128,7 +2163,7 @@ namespace } } // new partition - if (transform->current_row_number == 1) [[unlikely]] + if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform)) [[unlikely]] { current_partition_rows = 0; current_partition_inserted_row = 0; @@ -2137,25 +2172,9 @@ namespace current_partition_rows++; // Only do the action when we meet the last row in this partition. - if (!transform->partition_ended) + if (!WindowFunctionHelpers::checkPartitionEnterLastRow(transform)) return; - else - { - auto current_row = transform->current_row; - current_row.row++; - const auto & end_row = transform->partition_end; - if (current_row != end_row) - { - if (current_row.row < transform->blockRowsNumber(current_row)) - return; - if (end_row.block != current_row.block + 1 || end_row.row) - { - return; - } - // else, current_row is the last input row. - } - } auto bucket_capacity = current_partition_rows / buckets; auto capacity_diff = current_partition_rows - bucket_capacity * buckets; @@ -2211,6 +2230,115 @@ namespace } } +namespace +{ +struct PercentRankState +{ + RowNumber start_row; + UInt64 current_partition_rows = 0; +}; +} + +struct WindowFunctionPercentRank final : public StatefulWindowFunction +{ +public: + WindowFunctionPercentRank(const std::string & name_, + const DataTypes & argument_types_, const Array & parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) + {} + + bool allocatesMemoryInArena() const override { return false; } + + std::optional getDefaultFrame() const override + { + WindowFrame frame; + frame.type = WindowFrame::FrameType::ROWS; + frame.end_type = WindowFrame::BoundaryType::Unbounded; + return frame; + } + + void windowInsertResultInto(const WindowTransform * transform, size_t function_index) const override + { + checkFrameBoundType(transform); + + auto & state = getWorkspaceState(transform, function_index); + if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform)) + { + state.current_partition_rows = 0; + state.start_row = transform->current_row; + } + + insertRankIntoColumn(transform, function_index); + state.current_partition_rows++; + + if (!WindowFunctionHelpers::checkPartitionEnterLastRow(transform)) + { + return; + } + + UInt64 remaining_rows = state.current_partition_rows; + Float64 percent_rank_denominator = state.current_partition_rows - 1; + + if (remaining_rows <= 1) + return; + while(remaining_rows > 0) + { + auto block_rows_number = transform->blockRowsNumber(state.start_row); + auto available_block_rows = block_rows_number - state.start_row.row; + if (available_block_rows <= remaining_rows) + { + auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index]; + auto & data = assert_cast(to_column).getData(); + for (size_t i = state.start_row.row; i < block_rows_number; ++i) + data[i] = data[i] / percent_rank_denominator; + + state.start_row.block++; + state.start_row.row = 0; + remaining_rows -= available_block_rows; + } + else + { + auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index]; + auto & data = assert_cast(to_column).getData(); + for (size_t i = state.start_row.row, n = state.start_row.row + remaining_rows; i < n; ++i) + { + data[i] = data[i]/percent_rank_denominator; + } + state.start_row.row += remaining_rows; + remaining_rows = 0; + } + } + } + + + inline PercentRankState & getWorkspaceState(const WindowTransform * transform, size_t function_index) const + { + const auto & workspace = transform->workspaces[function_index]; + return getState(workspace); + } + + inline void insertRankIntoColumn(const WindowTransform * transform, size_t function_index) const + { + auto & to_column = *transform->blockAt(transform->current_row).output_columns[function_index]; + assert_cast(to_column).getData().push_back(static_cast(transform->peer_group_start_row_number) - 1); + } +private: + mutable bool has_check_frame_bound_type = false; + ALWAYS_INLINE void checkFrameBoundType(const WindowTransform * transform) const + { + if (has_check_frame_bound_type) + return; + if (transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded + || transform->window_description.frame.end_type != WindowFrame::BoundaryType::Unbounded) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Window frame for function 'percent_rank' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); + } + has_check_frame_bound_type = true; + } +}; + // ClickHouse-specific variant of lag/lead that respects the window frame. template struct WindowFunctionLagLeadInFrame final : public WindowFunction @@ -2582,6 +2710,13 @@ void registerWindowFunctions(AggregateFunctionFactory & factory) parameters); }, properties}, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("percent_rank", {[](const std::string & name, + const DataTypes & argument_types, const Array & parameters, const Settings *) + { + return std::make_shared(name, argument_types, + parameters); + }, properties}, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("row_number", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { diff --git a/tests/queries/0_stateless/01592_window_functions.reference b/tests/queries/0_stateless/01592_window_functions.reference index ec957dd7a02..0995def71e2 100644 --- a/tests/queries/0_stateless/01592_window_functions.reference +++ b/tests/queries/0_stateless/01592_window_functions.reference @@ -79,3 +79,15 @@ iPhone 900 Smartphone 500 500 Kindle Fire 150 Tablet 150 350 Samsung Galaxy Tab 200 Tablet 175 350 iPad 700 Tablet 350 350 +---- Q8 ---- +Lenovo Thinkpad Laptop 700 0 +Sony VAIO Laptop 700 0.3333333333333333 +Dell Vostro Laptop 800 0.6666666666666666 +HP Elite Laptop 1200 1 +Microsoft Lumia Smartphone 200 0 +HTC One Smartphone 400 0.3333333333333333 +Nexus Smartphone 500 0.6666666666666666 +iPhone Smartphone 900 1 +Kindle Fire Tablet 150 0 +Samsung Galaxy Tab Tablet 200 0.5 +iPad Tablet 700 1 diff --git a/tests/queries/0_stateless/01592_window_functions.sql b/tests/queries/0_stateless/01592_window_functions.sql index f0d173b1f20..e48e26b26d2 100644 --- a/tests/queries/0_stateless/01592_window_functions.sql +++ b/tests/queries/0_stateless/01592_window_functions.sql @@ -101,5 +101,23 @@ SELECT FROM products INNER JOIN product_groups USING (group_id)) t order by group_name, product_name, price; +select '---- Q8 ----'; +SELECT * +FROM +( + SELECT + product_name, + group_name, + price, + percent_rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS r + FROM products + INNER JOIN product_groups USING (group_id) +) AS t +ORDER BY + group_name ASC, + r ASC, + product_name ASC, + price ASC; + drop table product_groups; drop table products; From 6e231eedcf04e4136ea56fcbbb4a43916241dd23 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 18 Apr 2024 17:16:20 +0800 Subject: [PATCH 333/439] fixed style --- src/Processors/Transforms/WindowTransform.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 0c7caca9de5..4758d5ca7f4 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -16,10 +16,6 @@ #include #include #include -#include "WindowTransform.h" - -#include -#include #include @@ -1643,7 +1639,6 @@ struct WindowFunctionHelpers } }; - template struct StatefulWindowFunction : public WindowFunction { @@ -1672,8 +1667,6 @@ struct StatefulWindowFunction : public WindowFunction { return *reinterpret_cast(workspace.aggregate_function_state.data()); } - - }; struct ExponentialTimeDecayedSumState @@ -2281,7 +2274,7 @@ public: if (remaining_rows <= 1) return; - while(remaining_rows > 0) + while (remaining_rows > 0) { auto block_rows_number = transform->blockRowsNumber(state.start_row); auto available_block_rows = block_rows_number - state.start_row.row; From 37d2ced74cd173c44015a89fcb9522ef9c3979ee Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 18 Apr 2024 17:35:43 +0800 Subject: [PATCH 334/439] fixed typos --- docs/en/sql-reference/window-functions/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 8097abc0b15..814a7ac4aca 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -23,7 +23,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | `GROUPS` frame | ❌ | | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | | `rank()`, `dense_rank()`, `row_number()` | ✅ | -| `percent_rank()` | ✅ equal to `ifNull((rank() OVER(PARTITION BY x order by y) - 1) / nullif(count(1) OVER(PARTITION BY x) -1, 0), 0)`, but more efficent| +| `percent_rank()` | ✅ equal to `ifNull((rank() OVER(PARTITION BY x order by y) - 1) / nullif(count(1) OVER(PARTITION BY x) -1, 0), 0)`, but more efficient| | `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | | ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | From 13d5b336adb8021742f842bff457ee8b3267b743 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 19 Apr 2024 09:16:31 +0800 Subject: [PATCH 335/439] check window frame --- src/Processors/Transforms/WindowTransform.cpp | 91 +++++++++++-------- 1 file changed, 55 insertions(+), 36 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 4758d5ca7f4..45c9f4457b8 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -17,6 +17,9 @@ #include #include +#include +#include + #include @@ -71,6 +74,9 @@ public: size_t function_index) const = 0; virtual std::optional getDefaultFrame() const { return {}; } + + /// Is the frame type supported by this function. + virtual bool checkWindowFrameType(const WindowTransform * /*transform*/) const { return true; } }; // Compares ORDER BY column values at given rows to find the boundaries of frame: @@ -402,6 +408,19 @@ WindowTransform::WindowTransform(const Block & input_header_, } } } + + for (const auto & workspace : workspaces) + { + if (workspace.window_function_impl) + { + if (!workspace.window_function_impl->checkWindowFrameType(this)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported window frame type for function '{}'", + workspace.aggregate_function->getName()); + } + } + + } } WindowTransform::~WindowTransform() @@ -2086,8 +2105,6 @@ namespace const WindowTransform * transform, size_t function_index, const DataTypes & argument_types); - - static void checkWindowFrameType(const WindowTransform * transform); }; } @@ -2107,6 +2124,29 @@ struct WindowFunctionNtile final : public StatefulWindowFunction } bool allocatesMemoryInArena() const override { return false; } + + bool checkWindowFrameType(const WindowTransform * transform) const override + { + if (transform->order_by_indices.empty()) + { + LOG_ERROR(getLogger("WindowFunctionNtile"), "Window frame for 'ntile' function must have ORDER BY clause"); + return false; + } + + // We must wait all for the partition end and get the total rows number in this + // partition. So before the end of this partition, there is no any block could be + // dropped out. + bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded + && transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded; + if (!is_frame_supported) + { + LOG_ERROR( + getLogger("WindowFunctionNtile"), + "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); + return false; + } + return true; + } std::optional getDefaultFrame() const override { @@ -2134,7 +2174,6 @@ namespace { if (!buckets) [[unlikely]] { - checkWindowFrameType(transform); const auto & current_block = transform->blockAt(transform->current_row); const auto & workspace = transform->workspaces[function_index]; const auto & arg_col = *current_block.original_input_columns[workspace.argument_column_indices[0]]; @@ -2205,22 +2244,6 @@ namespace bucket_num += 1; } } - - void NtileState::checkWindowFrameType(const WindowTransform * transform) - { - if (transform->order_by_indices.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for 'ntile' function must have ORDER BY clause"); - - // We must wait all for the partition end and get the total rows number in this - // partition. So before the end of this partition, there is no any block could be - // dropped out. - bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded - && transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded; - if (!is_frame_supported) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); - } - } } namespace @@ -2249,11 +2272,22 @@ public: frame.end_type = WindowFrame::BoundaryType::Unbounded; return frame; } + + bool checkWindowFrameType(const WindowTransform * transform) const override + { + if (transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded + || transform->window_description.frame.end_type != WindowFrame::BoundaryType::Unbounded) + { + LOG_ERROR(getLogger("WindowFunctionPercentRank"), + "Window frame for function 'percent_rank' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); + return false; + } + return true; + } + void windowInsertResultInto(const WindowTransform * transform, size_t function_index) const override { - checkFrameBoundType(transform); - auto & state = getWorkspaceState(transform, function_index); if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform)) { @@ -2315,21 +2349,6 @@ public: auto & to_column = *transform->blockAt(transform->current_row).output_columns[function_index]; assert_cast(to_column).getData().push_back(static_cast(transform->peer_group_start_row_number) - 1); } -private: - mutable bool has_check_frame_bound_type = false; - ALWAYS_INLINE void checkFrameBoundType(const WindowTransform * transform) const - { - if (has_check_frame_bound_type) - return; - if (transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded - || transform->window_description.frame.end_type != WindowFrame::BoundaryType::Unbounded) - { - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Window frame for function 'percent_rank' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); - } - has_check_frame_bound_type = true; - } }; // ClickHouse-specific variant of lag/lead that respects the window frame. From 04e7b11a6477ed8b554a12ca301d00ba01e0525d Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 19 Apr 2024 10:33:44 +0800 Subject: [PATCH 336/439] fixed style --- src/Processors/Transforms/WindowTransform.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 45c9f4457b8..1cb447bb6d3 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -2124,7 +2124,7 @@ struct WindowFunctionNtile final : public StatefulWindowFunction } bool allocatesMemoryInArena() const override { return false; } - + bool checkWindowFrameType(const WindowTransform * transform) const override { if (transform->order_by_indices.empty()) @@ -2272,7 +2272,7 @@ public: frame.end_type = WindowFrame::BoundaryType::Unbounded; return frame; } - + bool checkWindowFrameType(const WindowTransform * transform) const override { if (transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded From 7f706dd9d1e7bf0b982c9db86f73c1cc89a4a0a5 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 19 Apr 2024 14:40:58 +0800 Subject: [PATCH 337/439] fixed --- src/Processors/Transforms/WindowTransform.cpp | 11 +--------- .../01592_window_functions.reference | 22 +++++++++---------- .../0_stateless/01592_window_functions.sql | 8 +++---- 3 files changed, 16 insertions(+), 25 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 1cb447bb6d3..ce188ed47ae 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -2265,18 +2265,9 @@ public: bool allocatesMemoryInArena() const override { return false; } - std::optional getDefaultFrame() const override - { - WindowFrame frame; - frame.type = WindowFrame::FrameType::ROWS; - frame.end_type = WindowFrame::BoundaryType::Unbounded; - return frame; - } - bool checkWindowFrameType(const WindowTransform * transform) const override { - if (transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded - || transform->window_description.frame.end_type != WindowFrame::BoundaryType::Unbounded) + if (transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded) { LOG_ERROR(getLogger("WindowFunctionPercentRank"), "Window frame for function 'percent_rank' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); diff --git a/tests/queries/0_stateless/01592_window_functions.reference b/tests/queries/0_stateless/01592_window_functions.reference index 0995def71e2..f88360abcc1 100644 --- a/tests/queries/0_stateless/01592_window_functions.reference +++ b/tests/queries/0_stateless/01592_window_functions.reference @@ -80,14 +80,14 @@ Kindle Fire 150 Tablet 150 350 Samsung Galaxy Tab 200 Tablet 175 350 iPad 700 Tablet 350 350 ---- Q8 ---- -Lenovo Thinkpad Laptop 700 0 -Sony VAIO Laptop 700 0.3333333333333333 -Dell Vostro Laptop 800 0.6666666666666666 -HP Elite Laptop 1200 1 -Microsoft Lumia Smartphone 200 0 -HTC One Smartphone 400 0.3333333333333333 -Nexus Smartphone 500 0.6666666666666666 -iPhone Smartphone 900 1 -Kindle Fire Tablet 150 0 -Samsung Galaxy Tab Tablet 200 0.5 -iPad Tablet 700 1 +Lenovo Thinkpad Laptop 700 1 0 +Sony VAIO Laptop 700 1 0 +Dell Vostro Laptop 800 3 0.6666666666666666 +HP Elite Laptop 1200 4 1 +Microsoft Lumia Smartphone 200 1 0 +HTC One Smartphone 400 2 0.3333333333333333 +Nexus Smartphone 500 3 0.6666666666666666 +iPhone Smartphone 900 4 1 +Kindle Fire Tablet 150 1 0 +Samsung Galaxy Tab Tablet 200 2 0.5 +iPad Tablet 700 3 1 diff --git a/tests/queries/0_stateless/01592_window_functions.sql b/tests/queries/0_stateless/01592_window_functions.sql index e48e26b26d2..f4b868c36e4 100644 --- a/tests/queries/0_stateless/01592_window_functions.sql +++ b/tests/queries/0_stateless/01592_window_functions.sql @@ -109,15 +109,15 @@ FROM product_name, group_name, price, - percent_rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS r + rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS rank, + percent_rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS percent FROM products INNER JOIN product_groups USING (group_id) ) AS t ORDER BY group_name ASC, - r ASC, - product_name ASC, - price ASC; + price ASC, + product_name ASC; drop table product_groups; drop table products; From 656a9a7260e3789b8fb671b788dbb3126d88ebe9 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 19 Apr 2024 14:42:24 +0800 Subject: [PATCH 338/439] update --- tests/queries/0_stateless/01592_window_functions.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01592_window_functions.sql b/tests/queries/0_stateless/01592_window_functions.sql index f4b868c36e4..2fc0e55bf02 100644 --- a/tests/queries/0_stateless/01592_window_functions.sql +++ b/tests/queries/0_stateless/01592_window_functions.sql @@ -109,14 +109,14 @@ FROM product_name, group_name, price, - rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS rank, + rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS rank, percent_rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS percent FROM products INNER JOIN product_groups USING (group_id) ) AS t ORDER BY group_name ASC, - price ASC, + price ASC, product_name ASC; drop table product_groups; From 91d2e5c72b38bb607d5a75020d38230a6937f310 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 19 Apr 2024 14:48:42 +0800 Subject: [PATCH 339/439] more corver case --- tests/queries/0_stateless/01592_window_functions.reference | 1 + tests/queries/0_stateless/01592_window_functions.sql | 3 +++ 2 files changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/01592_window_functions.reference b/tests/queries/0_stateless/01592_window_functions.reference index f88360abcc1..06ec67ee82d 100644 --- a/tests/queries/0_stateless/01592_window_functions.reference +++ b/tests/queries/0_stateless/01592_window_functions.reference @@ -91,3 +91,4 @@ iPhone Smartphone 900 4 1 Kindle Fire Tablet 150 1 0 Samsung Galaxy Tab Tablet 200 2 0.5 iPad Tablet 700 3 1 +Others Unknow 200 1 0 diff --git a/tests/queries/0_stateless/01592_window_functions.sql b/tests/queries/0_stateless/01592_window_functions.sql index 2fc0e55bf02..a660fcca7b2 100644 --- a/tests/queries/0_stateless/01592_window_functions.sql +++ b/tests/queries/0_stateless/01592_window_functions.sql @@ -102,6 +102,9 @@ FROM products INNER JOIN product_groups USING (group_id)) t order by group_name, product_name, price; select '---- Q8 ----'; +INSERT INTO product_groups VALUES (4, 'Unknow'); +INSERT INTO products (product_id,product_name, group_id,price) VALUES (12, 'Others', 4, 200); + SELECT * FROM ( From e52828abf91c2e407fbdf5371e5c794a31b86b1e Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 13 May 2024 11:19:33 +0800 Subject: [PATCH 340/439] fixed typos --- src/Processors/Transforms/WindowTransform.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index ce188ed47ae..ad592613da2 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -1644,14 +1644,14 @@ struct WindowFunctionHelpers auto current_row = transform->current_row; current_row.row++; - const auto & partitoin_end_row = transform->partition_end; + const auto & partition_end_row = transform->partition_end; /// If current_row == partitoin_end_row, return true. otherwise - if (current_row != partitoin_end_row) + if (current_row != partition_end_row) { if (current_row.row < transform->blockRowsNumber(current_row)) return false; /// Next row to current_row may belong to next block. - if (partitoin_end_row.block != current_row.block + 1 || partitoin_end_row.row) + if (partition_end_row.block != current_row.block + 1 || partition_end_row.row) return false; } return true; From fa234cadcbd287145ce93211dd239fdf034d9335 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 14 May 2024 11:47:50 +0800 Subject: [PATCH 341/439] update doc --- docs/en/sql-reference/window-functions/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 814a7ac4aca..16225d4b0e2 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -23,7 +23,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | `GROUPS` frame | ❌ | | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | | `rank()`, `dense_rank()`, `row_number()` | ✅ | -| `percent_rank()` | ✅ equal to `ifNull((rank() OVER(PARTITION BY x order by y) - 1) / nullif(count(1) OVER(PARTITION BY x) -1, 0), 0)`, but more efficient| +| `percent_rank()` | ✅ Efficiently computes the relative standing of a value within a partition in a dataset. This function effectively replaces the more verbose and computationally intensive manual SQL calculation expressed as `ifNull((rank() OVER(PARTITION BY x ORDER BY y) - 1) / nullif(count(1) OVER(PARTITION BY x) - 1, 0), 0)`| | `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | | ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | From 297b65dbbe1859bac7c237d644452b03e3e5849d Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 15 May 2024 17:16:44 +0800 Subject: [PATCH 342/439] fixed --- src/Processors/Transforms/WindowTransform.cpp | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index ad592613da2..517e202556b 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -2267,15 +2267,25 @@ public: bool checkWindowFrameType(const WindowTransform * transform) const override { - if (transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded) - { - LOG_ERROR(getLogger("WindowFunctionPercentRank"), - "Window frame for function 'percent_rank' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); - return false; + if (transform->window_description.frame.type != WindowFrame::FrameType::RANGE + || transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded) + { + LOG_ERROR( + getLogger("WindowFunctionPercentRank"), + "Window frame for function 'percent_rank' should be 'RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT'"); + return false; } return true; } + std::optional getDefaultFrame() const override + { + WindowFrame frame; + frame.type = WindowFrame::FrameType::RANGE; + frame.begin_type = WindowFrame::BoundaryType::Unbounded; + frame.end_type = WindowFrame::BoundaryType::Current; + return frame; + } void windowInsertResultInto(const WindowTransform * transform, size_t function_index) const override { From b6782d4b2d98e72002b691a3a421d689831fc1bf Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 24 May 2024 09:39:20 +0800 Subject: [PATCH 343/439] update --- src/Processors/Transforms/WindowTransform.cpp | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 517e202556b..729fef5c05d 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -1633,24 +1633,24 @@ struct WindowFunctionHelpers ALWAYS_INLINE static bool checkPartitionEnterLastRow(const WindowTransform * transform) { - /// when partition_ended is false, it means that we don't reach the last row in this partition. - /// But when partition_ended is true, it doesn't mean that we reach the last row in this partition. - /// partition_ended is true when - /// - the input has finished. or - /// - current block contains next partition's data. /// This is for fast check. if (!transform->partition_ended) return false; auto current_row = transform->current_row; + /// checkPartitionEnterLastRow is called on each row, also move on current_row.row here. current_row.row++; const auto & partition_end_row = transform->partition_end; - /// If current_row == partitoin_end_row, return true. otherwise + + /// The partition end is reached, when following is true + /// - current row is the partition end row, + /// - or current row is the last row of all input. if (current_row != partition_end_row) { + /// when current row is not the partition end row, we need to check whether it's the last + /// input row. if (current_row.row < transform->blockRowsNumber(current_row)) return false; - /// Next row to current_row may belong to next block. if (partition_end_row.block != current_row.block + 1 || partition_end_row.row) return false; } @@ -2268,7 +2268,8 @@ public: bool checkWindowFrameType(const WindowTransform * transform) const override { if (transform->window_description.frame.type != WindowFrame::FrameType::RANGE - || transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded) + || transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded + || transform->window_description.frame.end_type != WindowFrame::BoundaryType::Current) { LOG_ERROR( getLogger("WindowFunctionPercentRank"), @@ -2305,20 +2306,20 @@ public: } UInt64 remaining_rows = state.current_partition_rows; - Float64 percent_rank_denominator = state.current_partition_rows - 1; + Float64 percent_rank_denominator = remaining_rows == 1 ? 1 : remaining_rows - 1; - if (remaining_rows <= 1) - return; while (remaining_rows > 0) { auto block_rows_number = transform->blockRowsNumber(state.start_row); auto available_block_rows = block_rows_number - state.start_row.row; if (available_block_rows <= remaining_rows) { + /// This partition involves multiple blocks. Finish current block and move on to the + /// next block. auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index]; auto & data = assert_cast(to_column).getData(); for (size_t i = state.start_row.row; i < block_rows_number; ++i) - data[i] = data[i] / percent_rank_denominator; + data[i] = (data[i] - 1) / percent_rank_denominator; state.start_row.block++; state.start_row.row = 0; @@ -2326,11 +2327,12 @@ public: } else { + /// The partition ends in current block.s auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index]; auto & data = assert_cast(to_column).getData(); for (size_t i = state.start_row.row, n = state.start_row.row + remaining_rows; i < n; ++i) { - data[i] = data[i]/percent_rank_denominator; + data[i] = (data[i] - 1) / percent_rank_denominator; } state.start_row.row += remaining_rows; remaining_rows = 0; From 87978327d6bab9509c0aef945dfbf76b4437e300 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 27 May 2024 09:00:17 +0800 Subject: [PATCH 344/439] fixed --- src/Processors/Transforms/WindowTransform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 729fef5c05d..a694fa43e46 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -2350,7 +2350,7 @@ public: inline void insertRankIntoColumn(const WindowTransform * transform, size_t function_index) const { auto & to_column = *transform->blockAt(transform->current_row).output_columns[function_index]; - assert_cast(to_column).getData().push_back(static_cast(transform->peer_group_start_row_number) - 1); + assert_cast(to_column).getData().push_back(static_cast(transform->peer_group_start_row_number)); } }; From 1c82488eca1f3e4a12c66cc8b4779fd441cb5102 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Thu, 4 Jul 2024 10:59:00 +0200 Subject: [PATCH 345/439] 03167_base64_url_functions_sh.sh make test simpler --- .../03167_base64_url_functions_sh.sh | 47 +++++-------------- 1 file changed, 13 insertions(+), 34 deletions(-) diff --git a/tests/queries/0_stateless/03167_base64_url_functions_sh.sh b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh index f4d5addf370..57060b8c525 100755 --- a/tests/queries/0_stateless/03167_base64_url_functions_sh.sh +++ b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh @@ -132,51 +132,30 @@ base64URLDecode() { echo "$result" | tr '_-' '/+' | base64 -w0 -d } -test_compare_to_gold_encode() { +test() { local input="$1" - local encode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLEncode('$input')") + local encode_ch=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLEncode('$input')") local encode_gold=$(base64URLEncode $input) - if [ "$encode" != "$encode_gold" ]; then - echo "Input: $input" - echo "Expected: $encode_gold" - echo "Got: $encode" - fi -} - -test_compare_to_gold_decode() { - local input="$1" - local encode_gold=$(base64URLEncode $input) - local decode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode('$encode_gold')") + local decode_ch=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode('$encode_gold')") local decode_gold=$(base64URLDecode $encode_gold) - if [ "$decode" != "$decode_gold" ]; then + if [ "$encode_ch" != "$encode_gold" ]; then echo "Input: $input" - echo "Expected: $decode_gold" - echo "Got: $decode" + echo "Expected: $encode_gold" + echo "Got: $encode_ch" + fi + + if [ "$decode_ch" != "$input" ] || [ "$decode_ch" != "$decode_gold" ]; then + echo "Input: $input" + echo "Decode gold: $decode_gold" + echo "Got: $decode_ch" fi } -test_compare_to_self() { - local input="$1" - local decode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode(base64URLEncode('$input'))") - - if [ "$decode" != "$input" ]; then - echo "Input: $input" - echo "Got: $decode" - fi -} for url in "${urls[@]}"; do - test_compare_to_gold_encode "$url" -done - -for url in "${urls[@]}"; do - test_compare_to_gold_decode "$url" -done - -for url in "${urls[@]}"; do - test_compare_to_self "$url" + test "$url" done # special case for ' From bfc755c000cb016f88b51a9526fbb32b375ccbe4 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 4 Jul 2024 11:44:24 +0200 Subject: [PATCH 346/439] Fix shutdown --- .../ObjectStorageQueueSource.cpp | 55 +++++++++---------- .../ObjectStorageQueueSource.h | 1 - 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index dc5fb6d2744..f43796fc8b3 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -424,14 +424,14 @@ Chunk ObjectStorageQueueSource::generateImpl() { while (true) { - if (shutdown_called) - { - LOG_TRACE(log, "Shutdown was called, stopping sync"); - break; - } - if (!reader) { + if (shutdown_called) + { + LOG_TEST(log, "Shutdown called"); + break; + } + const auto context = getContext(); reader = StorageObjectStorageSource::createReader( processor_id, file_iterator, configuration, object_storage, read_from_format_info, @@ -448,28 +448,6 @@ Chunk ObjectStorageQueueSource::generateImpl() const auto * object_info = dynamic_cast(reader.getObjectInfo().get()); auto file_metadata = object_info->file_metadata; auto file_status = file_metadata->getFileStatus(); - - if (isCancelled()) - { - reader->cancel(); - - if (processed_rows_from_file) - { - try - { - file_metadata->setFailed("Cancelled", /* reduce_retry_count */true, /* overwrite_status */false); - } - catch (...) - { - LOG_ERROR(log, "Failed to set file {} as failed: {}", - object_info->relative_path, getCurrentExceptionMessage(true)); - } - } - - LOG_TEST(log, "Query is cancelled"); - break; - } - const auto & path = reader.getObjectInfo()->getPath(); if (shutdown_called) @@ -504,6 +482,27 @@ Chunk ObjectStorageQueueSource::generateImpl() path, processed_rows_from_file); } + if (isCancelled()) + { + reader->cancel(); + + if (processed_rows_from_file) + { + try + { + file_metadata->setFailed("Cancelled", /* reduce_retry_count */true, /* overwrite_status */false); + } + catch (...) + { + LOG_ERROR(log, "Failed to set file {} as failed: {}", + object_info->relative_path, getCurrentExceptionMessage(true)); + } + } + + LOG_TEST(log, "Query is cancelled"); + break; + } + auto * prev_scope = CurrentThread::get().attachProfileCountersScope(&file_status->profile_counters); SCOPE_EXIT({ CurrentThread::get().attachProfileCountersScope(prev_scope); }); /// FIXME: if files are compressed, profile counters update does not work fully (object storage related counters are not saved). Why? diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h index fce2a426ecb..0f3d0ab2e92 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h @@ -21,7 +21,6 @@ class ObjectStorageQueueSource : public ISource, WithContext public: using Storage = StorageObjectStorage; using Source = StorageObjectStorageSource; - using RemoveFileFunc = std::function; using BucketHolderPtr = ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr; using BucketHolder = ObjectStorageQueueOrderedFileMetadata::BucketHolder; From 7ae85fda3f0279ceb16a45ee8babdbb0cd40f57d Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 4 Jul 2024 11:46:17 +0200 Subject: [PATCH 347/439] Restore previous order --- .../ObjectStorageQueueSource.cpp | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 4f6f6a0e97a..4d921003e04 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -452,6 +452,27 @@ Chunk ObjectStorageQueueSource::generateImpl() auto file_status = file_metadata->getFileStatus(); const auto & path = reader.getObjectInfo()->getPath(); + if (isCancelled()) + { + reader->cancel(); + + if (processed_rows_from_file) + { + try + { + file_metadata->setFailed("Cancelled", /* reduce_retry_count */true, /* overwrite_status */false); + } + catch (...) + { + LOG_ERROR(log, "Failed to set file {} as failed: {}", + object_info->relative_path, getCurrentExceptionMessage(true)); + } + } + + LOG_TEST(log, "Query is cancelled"); + break; + } + if (shutdown_called) { LOG_TEST(log, "Shutdown called"); @@ -484,27 +505,6 @@ Chunk ObjectStorageQueueSource::generateImpl() path, processed_rows_from_file); } - if (isCancelled()) - { - reader->cancel(); - - if (processed_rows_from_file) - { - try - { - file_metadata->setFailed("Cancelled", /* reduce_retry_count */true, /* overwrite_status */false); - } - catch (...) - { - LOG_ERROR(log, "Failed to set file {} as failed: {}", - object_info->relative_path, getCurrentExceptionMessage(true)); - } - } - - LOG_TEST(log, "Query is cancelled"); - break; - } - try { auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::ObjectStorageQueuePullMicroseconds); From 75828c6e817e0c2a2c68040a63a46a083fc56e7a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Jul 2024 12:16:00 +0200 Subject: [PATCH 348/439] Try to disable sccache --- tests/ci/build_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 39f34ed9ccf..ac3ff9a0b5a 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -62,7 +62,7 @@ def get_packager_cmd( if build_config.tidy: cmd += " --clang-tidy" - cmd += " --cache=sccache" + cmd += " --cache=ccache" cmd += " --s3-rw-access" cmd += f" --s3-bucket={S3_BUILDS_BUCKET}" From 33b7afc1b45e0a493f4139dae14025e4e3613c29 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 4 Jul 2024 09:02:33 +0000 Subject: [PATCH 349/439] Bump vectorscan to 5.4.11 --- contrib/icu-cmake/CMakeLists.txt | 2 +- contrib/vectorscan | 2 +- contrib/vectorscan-cmake/CMakeLists.txt | 27 +++++--------------- contrib/vectorscan-cmake/common/hs_version.h | 6 ++++- 4 files changed, 14 insertions(+), 23 deletions(-) diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt index a54bd8c1de2..0a650f2bcc0 100644 --- a/contrib/icu-cmake/CMakeLists.txt +++ b/contrib/icu-cmake/CMakeLists.txt @@ -5,7 +5,7 @@ else () endif () if (NOT ENABLE_ICU) - message(STATUS "Not using icu") + message(STATUS "Not using ICU") return() endif() diff --git a/contrib/vectorscan b/contrib/vectorscan index 4918f81ea3d..d29730e1cb9 160000 --- a/contrib/vectorscan +++ b/contrib/vectorscan @@ -1 +1 @@ -Subproject commit 4918f81ea3d1abd18905bac9876d4a1fe2ebdf07 +Subproject commit d29730e1cb9daaa66bda63426cdce83505d2c809 diff --git a/contrib/vectorscan-cmake/CMakeLists.txt b/contrib/vectorscan-cmake/CMakeLists.txt index d6c626c1612..35d5fd3dc82 100644 --- a/contrib/vectorscan-cmake/CMakeLists.txt +++ b/contrib/vectorscan-cmake/CMakeLists.txt @@ -1,11 +1,8 @@ -# We use vectorscan, a portable and API/ABI-compatible drop-in replacement for hyperscan. - +# Vectorscan is drop-in replacement for Hyperscan. if ((ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER) OR ARCH_AARCH64) - option (ENABLE_VECTORSCAN "Enable vectorscan library" ${ENABLE_LIBRARIES}) + option (ENABLE_VECTORSCAN "Enable vectorscan" ${ENABLE_LIBRARIES}) endif() -# TODO PPC should generally work but needs manual generation of ppc/config.h file on a PPC machine - if (NOT ENABLE_VECTORSCAN) message (STATUS "Not using vectorscan") return() @@ -272,34 +269,24 @@ if (ARCH_AARCH64) ) endif() -# TODO -# if (ARCH_PPC64LE) -# list(APPEND SRCS -# "${LIBRARY_DIR}/src/util/supervector/arch/ppc64el/impl.cpp" -# ) -# endif() - add_library (_vectorscan ${SRCS}) -target_compile_options (_vectorscan PRIVATE - -fno-sanitize=undefined # assume the library takes care of itself - -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # options from original build system -) # library has too much debug information if (OMIT_HEAVY_DEBUG_SYMBOLS) target_compile_options (_vectorscan PRIVATE -g0) endif() -# Include version header manually generated by running the original build system -target_include_directories (_vectorscan SYSTEM PRIVATE common) +target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src") + +# Makes the version header visible. It was generated by running the native build system manually. +# Please update whenever you update vectorscan. +target_include_directories (_vectorscan SYSTEM PUBLIC common) # vectorscan inherited some patched in-source versions of boost headers to fix a bug in # boost 1.69. This bug has been solved long ago but vectorscan's source code still # points to the patched versions, so include it here. target_include_directories (_vectorscan SYSTEM PRIVATE "${LIBRARY_DIR}/include") -target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src") - # Include platform-specific config header generated by manually running the original build system # Please regenerate these files if you update vectorscan. diff --git a/contrib/vectorscan-cmake/common/hs_version.h b/contrib/vectorscan-cmake/common/hs_version.h index 8315b44fb2a..3d266484095 100644 --- a/contrib/vectorscan-cmake/common/hs_version.h +++ b/contrib/vectorscan-cmake/common/hs_version.h @@ -32,8 +32,12 @@ /** * A version string to identify this release of Hyperscan. */ -#define HS_VERSION_STRING "5.4.7 2022-06-20" +#define HS_VERSION_STRING "5.4.11 2024-07-04" #define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (7 << 8) | 0) +#define HS_MAJOR 5 +#define HS_MINOR 4 +#define HS_PATCH 11 + #endif /* HS_VERSION_H_C6428FAF8E3713 */ From 4543ae3d6490a660c4df8b15bde0345735a540e0 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 4 Jul 2024 12:51:22 +0200 Subject: [PATCH 350/439] Update test --- tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql index 95823283812..a01a595dbb5 100644 --- a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql @@ -3,5 +3,5 @@ create table test (d Dynamic) engine=Memory; insert into table test select * from numbers(5); alter table test modify column d Dynamic(max_types=1); select d.UInt64 from test settings allow_experimental_analyzer=1; -select d.UInt64 from test settings allow_experimental_analyzer=1; +select d.UInt64 from test settings allow_experimental_analyzer=0; From 6ea4c101214edf678743833856e09f717f672c67 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 27 Jun 2024 17:19:52 +0200 Subject: [PATCH 351/439] Done --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f6d282792db..9da0b297e9e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -399,7 +399,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", 0) \ + M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", IMPORTANT) \ M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ From 6f89c4b9328b587d2342ca172da4eaebe0611a1c Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 28 Jun 2024 21:09:36 +0000 Subject: [PATCH 352/439] Bump the minimal version to keep compatibility --- tests/integration/helpers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 544b06cca1b..34f5c28fef8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -73,7 +73,7 @@ CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.lo # Minimum version we use in integration tests to check compatibility with old releases # Keep in mind that we only support upgrading between releases that are at most 1 year different. # This means that this minimum need to be, at least, 1 year older than the current release -CLICKHOUSE_CI_MIN_TESTED_VERSION = "22.8" +CLICKHOUSE_CI_MIN_TESTED_VERSION = "23.3" # to create docker-compose env file From d62454714b1e8ea760c6d6baa8df4ec185b8750c Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 1 Jul 2024 17:52:35 +0200 Subject: [PATCH 353/439] Make test error visible --- tests/ci/integration_tests_runner.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 87f721cfde7..7802dfa3c52 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -434,7 +434,14 @@ class ClickhouseIntegrationTestsRunner: "Getting all tests to the file %s with cmd: \n%s", out_file_full, cmd ) with open(out_file_full, "wb") as ofd: - subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) + try: + subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) + except subprocess.CalledProcessError as ex: + print("ERROR: Setting test plan failed. Output:") + with open(out_file_full, 'r') as file: + for line in file: + print(" " + line, end='') + raise ex all_tests = set() with open(out_file_full, "r", encoding="utf-8") as all_tests_fd: From e2553454ecbe890cecf90995565d805287ca0703 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 1 Jul 2024 16:02:06 +0000 Subject: [PATCH 354/439] Automatic style fix --- tests/ci/integration_tests_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 7802dfa3c52..a1c33cf22d9 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -438,9 +438,9 @@ class ClickhouseIntegrationTestsRunner: subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) except subprocess.CalledProcessError as ex: print("ERROR: Setting test plan failed. Output:") - with open(out_file_full, 'r') as file: + with open(out_file_full, "r") as file: for line in file: - print(" " + line, end='') + print(" " + line, end="") raise ex all_tests = set() From 782115efea35a60cd9627faf22c1684ad54a551d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 1 Jul 2024 18:06:47 +0200 Subject: [PATCH 355/439] Very bad change --- tests/integration/test_distributed_inter_server_secret/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 50d7be4d11e..3e656c9d776 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -28,7 +28,7 @@ def make_instance(name, *args, **kwargs): # DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 added in 23.3, ensure that CLICKHOUSE_CI_MIN_TESTED_VERSION fits -assert CLICKHOUSE_CI_MIN_TESTED_VERSION < "23.3" +assert CLICKHOUSE_CI_MIN_TESTED_VERSION <= "23.3" # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user From 2c37cc048c16a90d972c0f1c2b9c41727174cff3 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 1 Jul 2024 22:11:44 +0200 Subject: [PATCH 356/439] Style --- tests/ci/integration_tests_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index a1c33cf22d9..21f16d995a4 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -438,7 +438,7 @@ class ClickhouseIntegrationTestsRunner: subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) except subprocess.CalledProcessError as ex: print("ERROR: Setting test plan failed. Output:") - with open(out_file_full, "r") as file: + with open(out_file_full, "r", encoding="utf-8") as file: for line in file: print(" " + line, end="") raise ex From 6fcd0eed06af3a02d1e26f182eb7f2bbf16d6471 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Jul 2024 11:40:04 +0000 Subject: [PATCH 357/439] Remove tests which are no longer relevant --- .../test.py | 36 +------------------ 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 3e656c9d776..457590ac851 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -7,7 +7,7 @@ import uuid import time from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION +from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) @@ -26,10 +26,6 @@ def make_instance(name, *args, **kwargs): **kwargs, ) - -# DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 added in 23.3, ensure that CLICKHOUSE_CI_MIN_TESTED_VERSION fits -assert CLICKHOUSE_CI_MIN_TESTED_VERSION <= "23.3" - # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user n1 = make_instance( @@ -38,14 +34,6 @@ n1 = make_instance( user_configs=["configs/users.d/new_user.xml"], ) n2 = make_instance("n2", main_configs=["configs/remote_servers_n2.xml"]) -backward = make_instance( - "backward", - main_configs=["configs/remote_servers_backward.xml"], - image="clickhouse/clickhouse-server", - # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 - tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, - with_installed_binary=True, -) users = pytest.mark.parametrize( "user,password", @@ -427,28 +415,6 @@ def test_per_user_protocol_settings_secure_cluster(user, password): ) -@users -def test_user_secure_cluster_with_backward(user, password): - id_ = "with-backward-query-dist_secure-" + user - n1.query( - f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password - ) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - -@users -def test_user_secure_cluster_from_backward(user, password): - id_ = "from-backward-query-dist_secure-" + user - backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - assert n1.contains_in_log( - "Using deprecated interserver protocol because the client is too old. Consider upgrading all nodes in cluster." - ) - - def test_secure_cluster_distributed_over_distributed_different_users(): # This works because we will have initial_user='default' n1.query( From 910065e42745e3a1299a596462d6197e4c36a50f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 2 Jul 2024 11:48:28 +0000 Subject: [PATCH 358/439] Automatic style fix --- tests/integration/test_distributed_inter_server_secret/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 457590ac851..7ecb2cda257 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -26,6 +26,7 @@ def make_instance(name, *args, **kwargs): **kwargs, ) + # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user n1 = make_instance( From dd3eb538f6a0788365ff62e15ab167b28f3d76a1 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Jul 2024 22:51:21 +0000 Subject: [PATCH 359/439] Better --- src/Client/HedgedConnections.cpp | 6 ++ src/Client/MultiplexedConnections.cpp | 6 ++ src/Server/TCPHandler.cpp | 7 ++ .../test_analyzer_compatibility/__init__.py | 0 .../configs/remote_servers.xml | 17 ++++ .../test_analyzer_compatibility/test.py | 79 +++++++++++++++++++ 6 files changed, 115 insertions(+) create mode 100644 tests/integration/test_analyzer_compatibility/__init__.py create mode 100644 tests/integration/test_analyzer_compatibility/configs/remote_servers.xml create mode 100644 tests/integration/test_analyzer_compatibility/test.py diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 8c993f906e0..51cbe6f3d6f 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -195,6 +195,12 @@ void HedgedConnections::sendQuery( modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset; } + /// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting. + /// Make the analyzer being set, so it will be effectively applied on the remote server. + /// In other words, the initiator always controls whether the analyzer enabled or not for + /// all servers involved in the distributed query processing. + modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); + replica.connection->sendQuery(timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {}); replica.change_replica_timeout.setRelative(timeouts.receive_data_timeout); replica.packet_receiver->setTimeout(hedged_connections_factory.getConnectionTimeouts().receive_timeout); diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 5d0fc8fd39e..99bdd706d8b 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -150,6 +150,12 @@ void MultiplexedConnections::sendQuery( } } + /// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting. + /// Make the analyzer being set, so it will be effectively applied on the remote server. + /// In other words, the initiator always controls whether the analyzer enabled or not for + /// all servers involved in the distributed query processing. + modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); + const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0; size_t num_replicas = replica_states.size(); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a522a3f8782..cfb41be0c27 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1890,6 +1890,13 @@ void TCPHandler::receiveQuery() /// /// Settings /// + + /// FIXME: Remove when allow_experimental_analyzer will become obsolete. + /// Even if allow_experimental_analyzer setting wasn't explicitly changed on the initiator server, it might be disabled there + /// So we just force ourselves to act in the same way. + if (query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) + passed_settings.set("allow_experimental_analyzer", static_cast(passed_settings.allow_experimental_analyzer)); + auto settings_changes = passed_settings.changes(); query_kind = query_context->getClientInfo().query_kind; if (query_kind == ClientInfo::QueryKind::INITIAL_QUERY) diff --git a/tests/integration/test_analyzer_compatibility/__init__.py b/tests/integration/test_analyzer_compatibility/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml b/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml new file mode 100644 index 00000000000..0a50dab7fd3 --- /dev/null +++ b/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml @@ -0,0 +1,17 @@ + + + + + true + + current + 9000 + + + backward + 9000 + + + + + diff --git a/tests/integration/test_analyzer_compatibility/test.py b/tests/integration/test_analyzer_compatibility/test.py new file mode 100644 index 00000000000..0ba7f248606 --- /dev/null +++ b/tests/integration/test_analyzer_compatibility/test.py @@ -0,0 +1,79 @@ +import uuid + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +CLICKHOUSE_MAX_VERSION_WITH_ANALYZER_DISABLED_BY_DEFAULT = "24.2" + +cluster = ClickHouseCluster(__file__) +# Here analyzer is enabled by default +current = cluster.add_instance( + "current", + main_configs=["configs/remote_servers.xml"], +) +# Here analyzer is disabled by default +backward = cluster.add_instance( + "backward", + use_old_analyzer=True, + main_configs=["configs/remote_servers.xml"], + image="clickhouse/clickhouse-server", + tag=CLICKHOUSE_MAX_VERSION_WITH_ANALYZER_DISABLED_BY_DEFAULT, + with_installed_binary=True, +) + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_two_new_versions(start_cluster): + # Two new versions (both know about the analyzer) + # One have it enabled by default, another one - disabled. + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + query_id = str(uuid.uuid4()) + current.query("SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables);", query_id=query_id) + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + assert current.query(""" +SELECT hostname() AS h, getSetting('allow_experimental_analyzer') +FROM clusterAllReplicas('test_cluster_mixed', system.one) +ORDER BY h;""") == TSV([["backward", "true"], ["current", "true"]]) + + # Should be enabled everywhere + analyzer_enabled = current.query(f""" +SELECT +DISTINCT Settings['allow_experimental_analyzer'] +FROM clusterAllReplicas('test_cluster_mixed', system.query_log) +WHERE initial_query_id = '{query_id}';""") + + assert TSV(analyzer_enabled) == TSV("1") + + query_id = str(uuid.uuid4()) + backward.query("SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables)", query_id=query_id) + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + assert backward.query(""" +SELECT hostname() AS h, getSetting('allow_experimental_analyzer') +FROM clusterAllReplicas('test_cluster_mixed', system.one) +ORDER BY h;""") == TSV([["backward", "false"], ["current", "false"]]) + + # Should be disabled everywhere + analyzer_enabled = backward.query(f""" +SELECT +DISTINCT Settings['allow_experimental_analyzer'] +FROM clusterAllReplicas('test_cluster_mixed', system.query_log) +WHERE initial_query_id = '{query_id}';""") + + assert TSV(analyzer_enabled) == TSV("0") From 7bd283764cc7c9350c376c3dc4f4b5c1bba0deee Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 2 Jul 2024 22:59:22 +0000 Subject: [PATCH 360/439] Automatic style fix --- .../test_analyzer_compatibility/test.py | 41 ++++++++++++++----- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/tests/integration/test_analyzer_compatibility/test.py b/tests/integration/test_analyzer_compatibility/test.py index 0ba7f248606..d4ded420c61 100644 --- a/tests/integration/test_analyzer_compatibility/test.py +++ b/tests/integration/test_analyzer_compatibility/test.py @@ -22,6 +22,7 @@ backward = cluster.add_instance( with_installed_binary=True, ) + @pytest.fixture(scope="module") def start_cluster(): try: @@ -39,41 +40,61 @@ def test_two_new_versions(start_cluster): backward.query("SYSTEM FLUSH LOGS") query_id = str(uuid.uuid4()) - current.query("SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables);", query_id=query_id) + current.query( + "SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables);", + query_id=query_id, + ) current.query("SYSTEM FLUSH LOGS") backward.query("SYSTEM FLUSH LOGS") - assert current.query(""" + assert ( + current.query( + """ SELECT hostname() AS h, getSetting('allow_experimental_analyzer') FROM clusterAllReplicas('test_cluster_mixed', system.one) -ORDER BY h;""") == TSV([["backward", "true"], ["current", "true"]]) +ORDER BY h;""" + ) + == TSV([["backward", "true"], ["current", "true"]]) + ) # Should be enabled everywhere - analyzer_enabled = current.query(f""" + analyzer_enabled = current.query( + f""" SELECT DISTINCT Settings['allow_experimental_analyzer'] FROM clusterAllReplicas('test_cluster_mixed', system.query_log) -WHERE initial_query_id = '{query_id}';""") +WHERE initial_query_id = '{query_id}';""" + ) assert TSV(analyzer_enabled) == TSV("1") query_id = str(uuid.uuid4()) - backward.query("SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables)", query_id=query_id) + backward.query( + "SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables)", + query_id=query_id, + ) current.query("SYSTEM FLUSH LOGS") backward.query("SYSTEM FLUSH LOGS") - assert backward.query(""" + assert ( + backward.query( + """ SELECT hostname() AS h, getSetting('allow_experimental_analyzer') FROM clusterAllReplicas('test_cluster_mixed', system.one) -ORDER BY h;""") == TSV([["backward", "false"], ["current", "false"]]) +ORDER BY h;""" + ) + == TSV([["backward", "false"], ["current", "false"]]) + ) # Should be disabled everywhere - analyzer_enabled = backward.query(f""" + analyzer_enabled = backward.query( + f""" SELECT DISTINCT Settings['allow_experimental_analyzer'] FROM clusterAllReplicas('test_cluster_mixed', system.query_log) -WHERE initial_query_id = '{query_id}';""") +WHERE initial_query_id = '{query_id}';""" + ) assert TSV(analyzer_enabled) == TSV("0") From d57375181d7de82f28ce5fcd40580cf04c2411b8 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 3 Jul 2024 15:49:52 +0000 Subject: [PATCH 361/439] Better --- src/Core/Settings.h | 2 +- tests/integration/helpers/cluster.py | 2 +- .../test.py | 35 ++++++++++++++++++- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9da0b297e9e..f6d282792db 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -399,7 +399,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", IMPORTANT) \ + M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", 0) \ M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 34f5c28fef8..544b06cca1b 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -73,7 +73,7 @@ CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.lo # Minimum version we use in integration tests to check compatibility with old releases # Keep in mind that we only support upgrading between releases that are at most 1 year different. # This means that this minimum need to be, at least, 1 year older than the current release -CLICKHOUSE_CI_MIN_TESTED_VERSION = "23.3" +CLICKHOUSE_CI_MIN_TESTED_VERSION = "22.8" # to create docker-compose env file diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 7ecb2cda257..50d7be4d11e 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -7,7 +7,7 @@ import uuid import time from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION cluster = ClickHouseCluster(__file__) @@ -27,6 +27,9 @@ def make_instance(name, *args, **kwargs): ) +# DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 added in 23.3, ensure that CLICKHOUSE_CI_MIN_TESTED_VERSION fits +assert CLICKHOUSE_CI_MIN_TESTED_VERSION < "23.3" + # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user n1 = make_instance( @@ -35,6 +38,14 @@ n1 = make_instance( user_configs=["configs/users.d/new_user.xml"], ) n2 = make_instance("n2", main_configs=["configs/remote_servers_n2.xml"]) +backward = make_instance( + "backward", + main_configs=["configs/remote_servers_backward.xml"], + image="clickhouse/clickhouse-server", + # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 + tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, + with_installed_binary=True, +) users = pytest.mark.parametrize( "user,password", @@ -416,6 +427,28 @@ def test_per_user_protocol_settings_secure_cluster(user, password): ) +@users +def test_user_secure_cluster_with_backward(user, password): + id_ = "with-backward-query-dist_secure-" + user + n1.query( + f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password + ) + assert get_query_user_info(n1, id_) == [user, user] + assert get_query_user_info(backward, id_) == [user, user] + + +@users +def test_user_secure_cluster_from_backward(user, password): + id_ = "from-backward-query-dist_secure-" + user + backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) + assert get_query_user_info(n1, id_) == [user, user] + assert get_query_user_info(backward, id_) == [user, user] + + assert n1.contains_in_log( + "Using deprecated interserver protocol because the client is too old. Consider upgrading all nodes in cluster." + ) + + def test_secure_cluster_distributed_over_distributed_different_users(): # This works because we will have initial_user='default' n1.query( From fcabefa8f3e5a86aad6f5c2b79ef8eabbc349b9d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 3 Jul 2024 16:42:01 +0000 Subject: [PATCH 362/439] Automatically disabling --- src/Server/TCPHandler.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index cfb41be0c27..443cc99475f 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1892,10 +1892,13 @@ void TCPHandler::receiveQuery() /// /// FIXME: Remove when allow_experimental_analyzer will become obsolete. - /// Even if allow_experimental_analyzer setting wasn't explicitly changed on the initiator server, it might be disabled there - /// So we just force ourselves to act in the same way. - if (query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) - passed_settings.set("allow_experimental_analyzer", static_cast(passed_settings.allow_experimental_analyzer)); + /// Analyzer became Beta in 24.3 and started to be enabled by default. + /// We have to disable it for ourselves to make sure we don't have different settings on + /// different servers. + if (query_kind == ClientInfo::QueryKind::SECONDARY_QUERY + && client_info.getVersionNumber() < VersionNumber(23, 3, 0) + && !passed_settings.allow_experimental_analyzer.changed) + passed_settings.set("allow_experimental_analyzer", false); auto settings_changes = passed_settings.changes(); query_kind = query_context->getClientInfo().query_kind; From fe6a875c7473d814011f4ae202942232e0801427 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 3 Jul 2024 21:51:40 +0000 Subject: [PATCH 363/439] Make the setting back IMPORTANT + fix build --- src/Core/Settings.h | 2 +- src/Server/TCPHandler.cpp | 2 +- tests/integration/helpers/cluster.py | 2 +- .../test.py | 35 +------------------ 4 files changed, 4 insertions(+), 37 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f6d282792db..9da0b297e9e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -399,7 +399,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", 0) \ + M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", IMPORTANT) \ M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 443cc99475f..ac1423f87c1 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1875,7 +1875,7 @@ void TCPHandler::receiveQuery() #endif } - query_context = session->makeQueryContext(std::move(client_info)); + query_context = session->makeQueryContext(client_info); /// Sets the default database if it wasn't set earlier for the session context. if (is_interserver_mode && !default_database.empty()) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 544b06cca1b..34f5c28fef8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -73,7 +73,7 @@ CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.lo # Minimum version we use in integration tests to check compatibility with old releases # Keep in mind that we only support upgrading between releases that are at most 1 year different. # This means that this minimum need to be, at least, 1 year older than the current release -CLICKHOUSE_CI_MIN_TESTED_VERSION = "22.8" +CLICKHOUSE_CI_MIN_TESTED_VERSION = "23.3" # to create docker-compose env file diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 50d7be4d11e..7ecb2cda257 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -7,7 +7,7 @@ import uuid import time from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION +from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) @@ -27,9 +27,6 @@ def make_instance(name, *args, **kwargs): ) -# DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 added in 23.3, ensure that CLICKHOUSE_CI_MIN_TESTED_VERSION fits -assert CLICKHOUSE_CI_MIN_TESTED_VERSION < "23.3" - # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user n1 = make_instance( @@ -38,14 +35,6 @@ n1 = make_instance( user_configs=["configs/users.d/new_user.xml"], ) n2 = make_instance("n2", main_configs=["configs/remote_servers_n2.xml"]) -backward = make_instance( - "backward", - main_configs=["configs/remote_servers_backward.xml"], - image="clickhouse/clickhouse-server", - # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 - tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, - with_installed_binary=True, -) users = pytest.mark.parametrize( "user,password", @@ -427,28 +416,6 @@ def test_per_user_protocol_settings_secure_cluster(user, password): ) -@users -def test_user_secure_cluster_with_backward(user, password): - id_ = "with-backward-query-dist_secure-" + user - n1.query( - f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password - ) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - -@users -def test_user_secure_cluster_from_backward(user, password): - id_ = "from-backward-query-dist_secure-" + user - backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - assert n1.contains_in_log( - "Using deprecated interserver protocol because the client is too old. Consider upgrading all nodes in cluster." - ) - - def test_secure_cluster_distributed_over_distributed_different_users(): # This works because we will have initial_user='default' n1.query( From c93d8cbb66ad7ae5a1e9c4f46f0c351944ff04e9 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 4 Jul 2024 13:57:47 +0200 Subject: [PATCH 364/439] Fixes --- contrib/jemalloc-cmake/CMakeLists.txt | 6 +++++- programs/keeper/Keeper.cpp | 5 +++++ programs/server/Server.cpp | 5 +++++ src/Common/Jemalloc.cpp | 26 +++++++++++++++++++++++++- src/Common/Jemalloc.h | 4 ++++ 5 files changed, 44 insertions(+), 2 deletions(-) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index cc5a391676f..38ebcc8f680 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -34,7 +34,11 @@ if (OS_LINUX) # avoid spurious latencies and additional work associated with # MADV_DONTNEED. See # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false") + if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") + else() + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") + endif() else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") endif() diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index f14ef2e5552..fc3778593a6 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -27,6 +27,8 @@ #include #include +#include + #include #include @@ -277,6 +279,9 @@ HTTPContextPtr httpContext() int Keeper::main(const std::vector & /*args*/) try { +#if USE_JEMALLOC + setJemallocBackgroundThreads(true); +#endif Poco::Logger * log = &logger(); UseSSL use_ssl; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 4cb3b5f45c7..1277249b462 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -656,6 +657,10 @@ static void initializeAzureSDKLogger( int Server::main(const std::vector & /*args*/) try { +#if USE_JEMALLOC + setJemallocBackgroundThreads(true); +#endif + Stopwatch startup_watch; Poco::Logger * log = &logger(); diff --git a/src/Common/Jemalloc.cpp b/src/Common/Jemalloc.cpp index fbe2f62c944..d7cc246db6a 100644 --- a/src/Common/Jemalloc.cpp +++ b/src/Common/Jemalloc.cpp @@ -46,6 +46,20 @@ void checkJemallocProfilingEnabled() "set: MALLOC_CONF=background_thread:true,prof:true"); } +template +void setJemallocValue(const char * name, T value) +{ + T old_value; + size_t old_value_size = sizeof(T); + if (mallctl(name, &old_value, &old_value_size, reinterpret_cast(&value), sizeof(T))) + { + LOG_WARNING(getLogger("Jemalloc"), "mallctl for {} failed", name); + return; + } + + LOG_INFO(getLogger("Jemalloc"), "Value for {} set to {} (from {})", name, value, old_value); +} + void setJemallocProfileActive(bool value) { checkJemallocProfilingEnabled(); @@ -58,7 +72,7 @@ void setJemallocProfileActive(bool value) return; } - mallctl("prof.active", nullptr, nullptr, &value, sizeof(bool)); + setJemallocValue("prof.active", value); LOG_TRACE(getLogger("SystemJemalloc"), "Profiling is {}", value ? "enabled" : "disabled"); } @@ -84,6 +98,16 @@ std::string flushJemallocProfile(const std::string & file_prefix) return profile_dump_path; } +void setJemallocBackgroundThreads(bool enabled) +{ + setJemallocValue("background_thread", enabled); +} + +void setJemallocMaxBackgroundThreads(size_t max_threads) +{ + setJemallocValue("max_background_threads", max_threads); +} + } #endif diff --git a/src/Common/Jemalloc.h b/src/Common/Jemalloc.h index 80ff0f1a319..499a906fd3d 100644 --- a/src/Common/Jemalloc.h +++ b/src/Common/Jemalloc.h @@ -17,6 +17,10 @@ void setJemallocProfileActive(bool value); std::string flushJemallocProfile(const std::string & file_prefix); +void setJemallocBackgroundThreads(bool enabled); + +void setJemallocMaxBackgroundThreads(size_t max_threads); + } #endif From 036d0c3fad88e7092af2c2e749ce7132db811d26 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Jul 2024 14:08:08 +0200 Subject: [PATCH 365/439] Revert "Try to disable sccache" This reverts commit 75828c6e817e0c2a2c68040a63a46a083fc56e7a. --- tests/ci/build_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index ac3ff9a0b5a..39f34ed9ccf 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -62,7 +62,7 @@ def get_packager_cmd( if build_config.tidy: cmd += " --clang-tidy" - cmd += " --cache=ccache" + cmd += " --cache=sccache" cmd += " --s3-rw-access" cmd += f" --s3-bucket={S3_BUILDS_BUCKET}" From 8040150de8d0a27ffa72cfa4eaf7563d419dca71 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Jul 2024 14:22:30 +0200 Subject: [PATCH 366/439] Very dirty hack --- docker/test/fasttest/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 5f92db5a3ee..d6cc17c4126 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -36,6 +36,7 @@ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld # FIXME: workaround for "The imported target "merge-fdata" references the file" error # https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake +RUN cp -r /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux ARG CCACHE_VERSION=4.6.1 RUN mkdir /tmp/ccache \ From da5f3c1efd940488413b34d3e5f8855460f0ce80 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 4 Jul 2024 13:01:57 +0000 Subject: [PATCH 367/439] Fix test --- .../test_functions.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index 758dda655da..e5023c062ff 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -130,10 +130,13 @@ def test_string_functions(start_cluster): functions = map(lambda x: x.strip(), functions) excludes = [ + # The argument of this function is not a seed, but an arbitrary expression needed for bypassing common subexpression elimination. "rand", "rand64", "randConstant", + "randCanonical", "generateUUIDv4", + "generateULID", # Syntax error otherwise "position", "substring", @@ -153,6 +156,16 @@ def test_string_functions(start_cluster): "tryBase64Decode", # Removed in 23.9 "meiliMatch", + # These functions require more than one argument. + "parseDateTimeInJodaSyntaxOrZero", + "parseDateTimeInJodaSyntaxOrNull", + "parseDateTimeOrNull", + "parseDateTimeOrZero", + "parseDateTime", + # The argument is effectively a disk name (and we don't have one with name foo) + "filesystemUnreserved", + "filesystemCapacity", + "filesystemAvailable", ] functions = filter(lambda x: x not in excludes, functions) @@ -205,6 +218,9 @@ def test_string_functions(start_cluster): # Function X takes exactly one parameter: # The function 'X' can only be used as a window function "BAD_ARGUMENTS", + # String foo is obviously not a valid IP address. + "CANNOT_PARSE_IPV4", + "CANNOT_PARSE_IPV6", ] if any(map(lambda x: x in error_message, allowed_errors)): logging.info("Skipping %s", function) From dbfe6323821739af4c8856e799cb5d861377439d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 4 Jul 2024 13:11:13 +0000 Subject: [PATCH 368/439] Correct the test to exclude farmHash for now --- tests/integration/test_backward_compatibility/test_functions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index e5023c062ff..fc03a77030e 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -166,6 +166,8 @@ def test_string_functions(start_cluster): "filesystemUnreserved", "filesystemCapacity", "filesystemAvailable", + # Exclude it for now. Looks like the result depends on the build type. + "farmHash64", ] functions = filter(lambda x: x not in excludes, functions) From 2c77371b8b6a5b1d397fadea17fe0ce97c2af106 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Jul 2024 15:27:23 +0200 Subject: [PATCH 369/439] Better --- docker/test/fasttest/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index d6cc17c4126..fa58261ee97 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -36,7 +36,8 @@ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld # FIXME: workaround for "The imported target "merge-fdata" references the file" error # https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake -RUN cp -r /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux +RUN ls /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu 2>/dev/null 1>/dev/null && ln -s /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null ||: +RUN ls /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu 2>/dev/null 1>/dev/null && ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null ||: ARG CCACHE_VERSION=4.6.1 RUN mkdir /tmp/ccache \ From 8d6c0147e258fd9bd939c6a52b51a422c43aaa15 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Jul 2024 15:30:27 +0200 Subject: [PATCH 370/439] Add comment --- docker/test/fasttest/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index fa58261ee97..a818e01ccca 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -36,6 +36,8 @@ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld # FIXME: workaround for "The imported target "merge-fdata" references the file" error # https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake +# LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path. +# It's very dirty workaround, better to build compiler and LLVM ourself and use it. RUN ls /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu 2>/dev/null 1>/dev/null && ln -s /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null ||: RUN ls /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu 2>/dev/null 1>/dev/null && ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null ||: From 32a61e6088a8d5b18d7f217c45c27f1268db2501 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Jul 2024 15:38:44 +0200 Subject: [PATCH 371/439] Bump From b4be9d5c6f64cb108e961180aa90453ac8efd5ff Mon Sep 17 00:00:00 2001 From: divanik Date: Thu, 4 Jul 2024 13:40:36 +0000 Subject: [PATCH 372/439] Fix address --- programs/disks/DisksApp.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 0898b692095..59ba45b9451 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -514,6 +514,7 @@ int DisksApp::main(const std::vector & /*args*/) DisksApp::~DisksApp() { + client.reset(nullptr); if (global_context) global_context->shutdown(); } From 1af3caf2b7eca87186de0aa73502f941ea913e14 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Jul 2024 15:48:12 +0200 Subject: [PATCH 373/439] Update docker/test/fasttest/Dockerfile Co-authored-by: Azat Khuzhin --- docker/test/fasttest/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index a818e01ccca..46490276003 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -38,8 +38,8 @@ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake # LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path. # It's very dirty workaround, better to build compiler and LLVM ourself and use it. -RUN ls /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu 2>/dev/null 1>/dev/null && ln -s /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null ||: -RUN ls /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu 2>/dev/null 1>/dev/null && ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null ||: +RUN ls /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu 2>/dev/null 1>/dev/null && ln -s /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null +RUN ls /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu 2>/dev/null 1>/dev/null && ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null ARG CCACHE_VERSION=4.6.1 RUN mkdir /tmp/ccache \ From 1e5bc5bc8c7e65908e7127e06e1f5008f6fbfa20 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 4 Jul 2024 15:48:12 +0200 Subject: [PATCH 374/439] fix flaky test --- .../0_stateless/03172_error_log_table_not_empty.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/03172_error_log_table_not_empty.sh b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh index 8d74ebe1039..4b83400f5de 100755 --- a/tests/queries/0_stateless/03172_error_log_table_not_empty.sh +++ b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh @@ -4,17 +4,19 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# system.error_log is created lazy, flush logs query makes it sure that the table is created. +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS;" + # Get the previous number of errors for 111, 222 and 333 errors_111=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 111") errors_222=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 222") errors_333=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 333") -# Throw three random errors: 111, 222 and 333 and wait for more than collect_interval_milliseconds to ensure system.error_log is flushed +# Throw three random errors: 111, 222 and 333 and call flush logs to ensure system.error_log is flushed $CLICKHOUSE_CLIENT -mn -q " SELECT throwIf(true, 'error_log', toInt16(111)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 111 } SELECT throwIf(true, 'error_log', toInt16(222)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 222 } SELECT throwIf(true, 'error_log', toInt16(333)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 333 } -SELECT sleep(2) format NULL; SYSTEM FLUSH LOGS; " @@ -30,7 +32,6 @@ $CLICKHOUSE_CLIENT -mn -q " SELECT throwIf(true, 'error_log', toInt16(111)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 111 } SELECT throwIf(true, 'error_log', toInt16(222)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 222 } SELECT throwIf(true, 'error_log', toInt16(333)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 333 } -SELECT sleep(2) format NULL; SYSTEM FLUSH LOGS; " @@ -38,4 +39,4 @@ $CLICKHOUSE_CLIENT -mn -q " SELECT sum(value) > $(($errors_111+1)) FROM system.error_log WHERE code = 111; SELECT sum(value) > $(($errors_222+1)) FROM system.error_log WHERE code = 222; SELECT sum(value) > $(($errors_333+1)) FROM system.error_log WHERE code = 333; -" \ No newline at end of file +" From 34697c0bfa85df5b0f07d6457f36e1b1eec4e680 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Thu, 4 Jul 2024 15:55:03 +0200 Subject: [PATCH 375/439] Tests: increase timeout for flaky check --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 36870d59c3a..ef61d493f4b 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -69,7 +69,7 @@ TEST_FILE_EXTENSIONS = [".sql", ".sql.j2", ".sh", ".py", ".expect"] VERSION_PATTERN = r"^((\d+\.)?(\d+\.)?(\d+\.)?\d+)$" -TEST_MAX_RUN_TIME_IN_SECONDS = 120 +TEST_MAX_RUN_TIME_IN_SECONDS = 180 class SharedEngineReplacer: From 963f39ede89ecdfafd0731d0c5fed27dbad7bc11 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Jul 2024 15:56:44 +0200 Subject: [PATCH 376/439] Revert "Update docker/test/fasttest/Dockerfile" This reverts commit 1af3caf2b7eca87186de0aa73502f941ea913e14. --- docker/test/fasttest/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 46490276003..a818e01ccca 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -38,8 +38,8 @@ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake # LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path. # It's very dirty workaround, better to build compiler and LLVM ourself and use it. -RUN ls /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu 2>/dev/null 1>/dev/null && ln -s /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null -RUN ls /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu 2>/dev/null 1>/dev/null && ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null +RUN ls /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu 2>/dev/null 1>/dev/null && ln -s /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null ||: +RUN ls /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu 2>/dev/null 1>/dev/null && ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null ||: ARG CCACHE_VERSION=4.6.1 RUN mkdir /tmp/ccache \ From 11a30d6d6026dca61247590ce337e8e766601c5a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 4 Jul 2024 14:14:19 +0000 Subject: [PATCH 377/439] Bump s2geometry to latest master --- contrib/s2geometry | 2 +- contrib/s2geometry-cmake/CMakeLists.txt | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/contrib/s2geometry b/contrib/s2geometry index 0547c383717..0146e2d1355 160000 --- a/contrib/s2geometry +++ b/contrib/s2geometry @@ -1 +1 @@ -Subproject commit 0547c38371777a1c1c8be263a6f05c3bf71bb05b +Subproject commit 0146e2d1355828f8f633cb050948250ad7406c57 diff --git a/contrib/s2geometry-cmake/CMakeLists.txt b/contrib/s2geometry-cmake/CMakeLists.txt index 6632f9c27d5..48562b8cead 100644 --- a/contrib/s2geometry-cmake/CMakeLists.txt +++ b/contrib/s2geometry-cmake/CMakeLists.txt @@ -1,7 +1,7 @@ -option(ENABLE_S2_GEOMETRY "Enable S2 geometry library" ${ENABLE_LIBRARIES}) +option(ENABLE_S2_GEOMETRY "Enable S2 Geometry" ${ENABLE_LIBRARIES}) if (NOT ENABLE_S2_GEOMETRY) - message(STATUS "Not using S2 geometry") + message(STATUS "Not using S2 Geometry") return() endif() @@ -38,6 +38,7 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2cell_index.cc" "${S2_SOURCE_DIR}/s2/s2cell_union.cc" "${S2_SOURCE_DIR}/s2/s2centroids.cc" + "${S2_SOURCE_DIR}/s2/s2chain_interpolation_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_cell_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_point_query.cc" @@ -46,6 +47,7 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2coords.cc" "${S2_SOURCE_DIR}/s2/s2crossing_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2debug.cc" + "${S2_SOURCE_DIR}/s2/s2density_tree.cc" "${S2_SOURCE_DIR}/s2/s2earth.cc" "${S2_SOURCE_DIR}/s2/s2edge_clipping.cc" "${S2_SOURCE_DIR}/s2/s2edge_crosser.cc" @@ -53,8 +55,10 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2edge_distances.cc" "${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc" "${S2_SOURCE_DIR}/s2/s2error.cc" + "${S2_SOURCE_DIR}/s2/s2fractal.cc" "${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2hausdorff_distance_query.cc" + "${S2_SOURCE_DIR}/s2/s2index_cell_data.cc" "${S2_SOURCE_DIR}/s2/s2latlng.cc" "${S2_SOURCE_DIR}/s2/s2latlng_rect.cc" "${S2_SOURCE_DIR}/s2/s2latlng_rect_bounder.cc" @@ -63,10 +67,10 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2lax_polyline_shape.cc" "${S2_SOURCE_DIR}/s2/s2loop.cc" "${S2_SOURCE_DIR}/s2/s2loop_measures.cc" + "${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2measures.cc" "${S2_SOURCE_DIR}/s2/s2memory_tracker.cc" "${S2_SOURCE_DIR}/s2/s2metrics.cc" - "${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2min_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2padded_cell.cc" "${S2_SOURCE_DIR}/s2/s2point_compression.cc" @@ -80,10 +84,11 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2predicates.cc" "${S2_SOURCE_DIR}/s2/s2projections.cc" "${S2_SOURCE_DIR}/s2/s2r2rect.cc" - "${S2_SOURCE_DIR}/s2/s2region.cc" - "${S2_SOURCE_DIR}/s2/s2region_term_indexer.cc" + "${S2_SOURCE_DIR}/s2/s2random.cc" "${S2_SOURCE_DIR}/s2/s2region_coverer.cc" "${S2_SOURCE_DIR}/s2/s2region_intersection.cc" + "${S2_SOURCE_DIR}/s2/s2region_sharder.cc" + "${S2_SOURCE_DIR}/s2/s2region_term_indexer.cc" "${S2_SOURCE_DIR}/s2/s2region_union.cc" "${S2_SOURCE_DIR}/s2/s2shape_index.cc" "${S2_SOURCE_DIR}/s2/s2shape_index_buffered_region.cc" @@ -94,9 +99,12 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_conversion.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_count_vertices.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_wrap.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc" + "${S2_SOURCE_DIR}/s2/s2testing.cc" "${S2_SOURCE_DIR}/s2/s2text_format.cc" "${S2_SOURCE_DIR}/s2/s2wedge_relations.cc" "${S2_SOURCE_DIR}/s2/s2winding_operation.cc" @@ -140,6 +148,7 @@ target_link_libraries(_s2 PRIVATE absl::strings absl::type_traits absl::utility + absl::vlog_is_on ) target_include_directories(_s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/") From f8ea14da5d46dc09d1e8c2df62e6b03258416628 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 4 Jul 2024 02:23:43 +0000 Subject: [PATCH 378/439] fix function 'has' with LowCardinality and FixedString --- src/Functions/array/arrayIndex.h | 445 +++++++----------- .../03199_has_lc_fixed_string.reference | 2 + .../0_stateless/03199_has_lc_fixed_string.sql | 7 + 3 files changed, 181 insertions(+), 273 deletions(-) create mode 100644 tests/queries/0_stateless/03199_has_lc_fixed_string.reference create mode 100644 tests/queries/0_stateless/03199_has_lc_fixed_string.sql diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index fa9b3dc92dd..067957f0d4b 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -28,6 +28,7 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; } using NullMap = PaddedPODArray; @@ -424,31 +425,21 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override { - if constexpr (std::is_same_v) + if (auto res = executeMap(arguments, result_type)) + return res; + + if (auto res = executeArrayLowCardinality(arguments)) + return res; + + auto new_arguments = arguments; + + for (auto & argument : new_arguments) { - if (isMap(arguments[0].type)) - { - auto non_const_map_column = arguments[0].column->convertToFullColumnIfConst(); - - const auto & map_column = assert_cast(*non_const_map_column); - const auto & map_array_column = map_column.getNestedColumn(); - auto offsets = map_array_column.getOffsetsPtr(); - auto keys = map_column.getNestedData().getColumnPtr(0); - auto array_column = ColumnArray::create(keys, offsets); - - const auto & type_map = assert_cast(*arguments[0].type); - auto array_type = std::make_shared(type_map.getKeyType()); - - auto arguments_copy = arguments; - arguments_copy[0].column = std::move(array_column); - arguments_copy[0].type = std::move(array_type); - arguments_copy[0].name = arguments[0].name; - - return executeArrayImpl(arguments_copy, result_type); - } + argument.column = recursiveRemoveLowCardinality(argument.column); + argument.type = recursiveRemoveLowCardinality(argument.type); } - return executeArrayImpl(arguments, result_type); + return executeArrayImpl(new_arguments, result_type); } private: @@ -458,18 +449,6 @@ private: using NullMaps = std::pair; - struct ExecutionData - { - const IColumn& left; - const IColumn& right; - const ColumnArray::Offsets& offsets; - ColumnPtr result_column; - NullMaps maps; - ResultColumnPtr result { ResultColumnType::create() }; - - void moveResult() { result_column = std::move(result); } - }; - static bool allowArguments(const DataTypePtr & inner_type, const DataTypePtr & arg) { auto inner_type_decayed = removeNullable(removeLowCardinality(inner_type)); @@ -574,23 +553,13 @@ private: } } -#define INTEGRAL_TPL_PACK UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64 +#define INTEGRAL_PACK UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64 ColumnPtr executeOnNonNullable(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const { - if (const auto* const left_arr = checkAndGetColumn(arguments[0].column.get())) - { - if (checkAndGetColumn(&left_arr->getData())) - { - if (auto res = executeLowCardinality(arguments)) - return res; - - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal internal type of first argument of function {}", getName()); - } - } - ColumnPtr res; - if (!((res = executeIntegral(arguments)) + if (!((res = executeNothing(arguments)) + || (res = executeIntegral(arguments)) || (res = executeConst(arguments, result_type)) || (res = executeString(arguments)) || (res = executeGeneric(arguments)))) @@ -599,6 +568,8 @@ private: return res; } +#undef INTEGRAL_PACK + /** * The Array's internal data type may be quite tricky (containing a Nullable type somewhere). To process the * Nullable types correctly, for each data type specialisation we provide two null maps (one for the data and one @@ -627,6 +598,14 @@ private: return {null_map_data, null_map_item}; } + struct ExecutionData + { + const IColumn & left; + const IColumn & right; + const ColumnArray::Offsets & offsets; + NullMaps null_maps; + }; + /** * Given a variadic pack #Integral, apply executeIntegralExpanded with such parameters: * Integral s = {s1, s2, ...} @@ -635,39 +614,33 @@ private: template static ColumnPtr executeIntegral(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * const left = checkAndGetColumn(arguments[0].column.get()); - - if (!left) + const auto * array = checkAndGetColumn(arguments[0].column.get()); + if (!array) return nullptr; - const ColumnPtr right_converted_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - const IColumn& right = *right_converted_ptr.get(); - - ExecutionData data = { - left->getData(), - right, - left->getOffsets(), - nullptr, - getNullMaps(arguments) + ExecutionData data + { + .left = array->getData(), + .right = *arguments[1].column, + .offsets = array->getOffsets(), + .null_maps = getNullMaps(arguments), }; - if (executeIntegral(data)) - return data.result_column; - - return nullptr; + auto result = ResultColumnType::create(); + return executeIntegral(data, *result) ? std::move(result) : nullptr; } template - static bool executeIntegral(ExecutionData& data) + static bool executeIntegral(const ExecutionData & data, ResultColumnType & result) { - return (executeIntegralExpanded(data) || ...); + return (executeIntegralExpanded(data, result) || ...); } /// Invoke executeIntegralImpl with such parameters: (A, other1), (A, other2), ... template - static bool executeIntegralExpanded(ExecutionData& data) + static bool executeIntegralExpanded(const ExecutionData & data, ResultColumnType & result) { - return (executeIntegralImpl(data) || ...); + return (executeIntegralImpl(data, result) || ...); } /** @@ -676,40 +649,31 @@ private: * so we have to check all possible variants for #Initial and #Resulting types. */ template - static bool executeIntegralImpl(ExecutionData& data) + static bool executeIntegralImpl(const ExecutionData & data, ResultColumnType & result) { - const ColumnVector * col_nested = checkAndGetColumn>(&data.left); - - if (!col_nested) + const auto * left_typed = checkAndGetColumn>(&data.left); + if (!left_typed) return false; - const auto [null_map_data, null_map_item] = data.maps; - - if (data.right.onlyNull()) - Impl::Null::process( - data.offsets, - data.result->getData(), - null_map_data); - else if (const auto item_arg_const = checkAndGetColumnConst>(&data.right)) + if (const auto * item_arg_const = checkAndGetColumnConst>(&data.right)) Impl::Main::vector( - col_nested->getData(), + left_typed->getData(), data.offsets, item_arg_const->template getValue(), - data.result->getData(), - null_map_data, + result.getData(), + data.null_maps.first, nullptr); - else if (const auto item_arg_vector = checkAndGetColumn>(&data.right)) + else if (const auto * item_arg_vector = checkAndGetColumn>(&data.right)) Impl::Main::vector( - col_nested->getData(), + left_typed->getData(), data.offsets, item_arg_vector->getData(), - data.result->getData(), - null_map_data, - null_map_item); + result.getData(), + data.null_maps.first, + data.null_maps.second); else return false; - data.moveResult(); return true; } @@ -724,227 +688,153 @@ private: * * Tips and tricks tried can be found at https://github.com/ClickHouse/ClickHouse/pull/12550 . */ - static ColumnPtr executeLowCardinality(const ColumnsWithTypeAndName & arguments) + static ColumnPtr executeArrayLowCardinality(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * const col_array = checkAndGetColumn(arguments[0].column.get()); + const auto * col_array = checkAndGetColumn(arguments[0].column.get()); + const auto * col_array_const = checkAndGetColumnConstData(arguments[0].column.get()); - if (!col_array) + if (!col_array && !col_array_const) return nullptr; - const ColumnLowCardinality * const col_lc = checkAndGetColumn(&col_array->getData()); + if (col_array_const) + col_array = col_array_const; - if (!col_lc) + const auto * left_lc = checkAndGetColumn(&col_array->getData()); + if (!left_lc) return nullptr; - const auto [null_map_data, null_map_item] = getNullMaps(arguments); + const auto * right_const = checkAndGetColumn(arguments[1].column.get()); + if (!right_const) + return nullptr; - if (const ColumnConst * col_arg_const = checkAndGetColumn(&*arguments[1].column)) + const auto & array_type = assert_cast(*arguments[0].type); + const auto target_type = recursiveRemoveLowCardinality(array_type.getNestedType()); + auto right = recursiveRemoveLowCardinality(right_const->getDataColumnPtr()); + + UInt64 index = 0; + ResultColumnPtr col_result = ResultColumnType::create(); + + if (!right->isNullAt(0)) { - const IColumnUnique & col_lc_dict = col_lc->getDictionary(); + auto right_type = recursiveRemoveLowCardinality(arguments[1].type); + right = castColumn({right, right_type, ""}, target_type); - const DataTypeArray * const array_type = checkAndGetDataType(arguments[0].type.get()); - const DataTypePtr target_type_ptr = recursiveRemoveLowCardinality(array_type->getNestedType()); + if (right->isNullable()) + right = checkAndGetColumn(*right).getNestedColumnPtr(); - ColumnPtr col_arg_cloned = castColumn( - {col_arg_const->getDataColumnPtr(), arguments[1].type, arguments[1].name}, target_type_ptr); + StringRef elem = right->getDataAt(0); + const auto & left_dict = left_lc->getDictionary(); - ResultColumnPtr col_result = ResultColumnType::create(); - UInt64 index = 0; - - if (!col_arg_cloned->isNullAt(0)) + if (std::optional maybe_index = left_dict.getOrFindValueIndex(elem); maybe_index) { - if (col_arg_cloned->isNullable()) - col_arg_cloned = checkAndGetColumn(*col_arg_cloned).getNestedColumnPtr(); - - StringRef elem = col_arg_cloned->getDataAt(0); - - if (std::optional maybe_index = col_lc_dict.getOrFindValueIndex(elem); maybe_index) - { - index = *maybe_index; - } - else - { - const size_t offsets_size = col_array->getOffsets().size(); - auto & data = col_result->getData(); - - data.resize_fill(offsets_size); - - return col_result; - } + index = *maybe_index; } - - Impl::Main::vector( - col_lc->getIndexes(), - col_array->getOffsets(), - index, /** Assuming LowCardinality has index of NULL always as zero. */ - col_result->getData(), - null_map_data, - null_map_item); - - return col_result; - } - else if (col_lc->nestedIsNullable()) // LowCardinality(Nullable(T)) and U - { - const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality(); // Nullable(T) - const ColumnNullable & left_nullable = checkAndGetColumn(*left_casted); - - const NullMap * const null_map_left_casted = &left_nullable.getNullMapColumn().getData(); - - const IColumn & left_ptr = left_nullable.getNestedColumn(); - - const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality(); - const ColumnNullable * const right_nullable = checkAndGetColumn(right_casted.get()); - - const NullMap * const null_map_right_casted = right_nullable - ? &right_nullable->getNullMapColumn().getData() - : null_map_item; - - const IColumn& right_ptr = right_nullable - ? right_nullable->getNestedColumn() - : *right_casted.get(); - - ExecutionData data = + else { - left_ptr, right_ptr, - col_array->getOffsets(), - nullptr, - {null_map_left_casted, null_map_right_casted}}; - - if (dispatchConvertedLowCardinalityColumns(data)) - return data.result_column; + col_result->getData().resize_fill(col_array->size()); + return col_result; + } } - else // LowCardinality(T) and U, T not Nullable - { - if (arguments[1].column->isNullable()) - return nullptr; - - if (const auto* const arg_lc = checkAndGetColumn(arguments[1].column.get()); - arg_lc && arg_lc->isNullable()) - return nullptr; - - // LowCardinality(T) and U (possibly LowCardinality(V)) - - const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality(); - const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality(); - - ExecutionData data = - { - *left_casted.get(), *right_casted.get(), col_array->getOffsets(), - nullptr, {null_map_data, null_map_item} - }; - - if (dispatchConvertedLowCardinalityColumns(data)) - return data.result_column; - } - - return nullptr; - } - - static bool dispatchConvertedLowCardinalityColumns(ExecutionData & data) - { - if (data.left.isNumeric() && data.right.isNumeric()) // ColumnArrays - return executeIntegral(data); - - if (checkAndGetColumn(&data.left)) - return executeStringImpl(data); Impl::Main::vector( - data.left, - data.offsets, data.right, - data.result->getData(), - data.maps.first, data.maps.second); + left_lc->getIndexes(), + col_array->getOffsets(), + index, /** Assuming LowCardinality has index of NULL always as zero. */ + col_result->getData(), + nullptr, + nullptr); - data.moveResult(); - return true; + return col_result; } -#undef INTEGRAL_TPL_PACK + ColumnPtr executeMap(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const + { + if constexpr (!std::is_same_v) + return nullptr; + + if (!isMap(arguments[0].type)) + return nullptr; + + auto non_const_map_column = arguments[0].column->convertToFullColumnIfConst(); + + const auto & map_column = assert_cast(*non_const_map_column); + const auto & map_array_column = map_column.getNestedColumn(); + auto offsets = map_array_column.getOffsetsPtr(); + auto keys = map_column.getNestedData().getColumnPtr(0); + auto array_column = ColumnArray::create(keys, offsets); + + const auto & type_map = assert_cast(*arguments[0].type); + auto array_type = std::make_shared(type_map.getKeyType()); + + auto arguments_copy = arguments; + arguments_copy[0].column = std::move(array_column); + arguments_copy[0].type = std::move(array_type); + arguments_copy[0].name = arguments[0].name; + + return executeArrayImpl(arguments_copy, result_type); + } static ColumnPtr executeString(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * array = checkAndGetColumn(arguments[0].column.get()); - + const auto * array = checkAndGetColumn(arguments[0].column.get()); if (!array) return nullptr; - const ColumnString * left = checkAndGetColumn(&array->getData()); - + const auto * left = checkAndGetColumn(&array->getData()); if (!left) return nullptr; - const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - const IColumn & right = *right_ptr.get(); + const auto & right = *arguments[1].column; + const auto [null_map_data, null_map_item] = getNullMaps(arguments); - ExecutionData data = { - *left, right, array->getOffsets(), - nullptr, getNullMaps(arguments), - std::move(ResultColumnType::create()) - }; + auto result = ResultColumnType::create(); - if (executeStringImpl(data)) - return data.result_column; - - return nullptr; - } - - static bool executeStringImpl(ExecutionData& data) - { - const auto [null_map_data, null_map_item] = data.maps; - const ColumnString& left = *typeid_cast(&data.left); - - if (data.right.onlyNull()) - Impl::Null::process( - data.offsets, - data.result->getData(), - null_map_data); - else if (const auto *const item_arg_const = checkAndGetColumnConstStringOrFixedString(&data.right)) + if (const auto * item_arg_const = checkAndGetColumnConstStringOrFixedString(&right)) { - const ColumnString * item_const_string = - checkAndGetColumn(&item_arg_const->getDataColumn()); - - const ColumnFixedString * item_const_fixedstring = - checkAndGetColumn(&item_arg_const->getDataColumn()); + const auto * item_const_string = checkAndGetColumn(&item_arg_const->getDataColumn()); + const auto * item_const_fixedstring = checkAndGetColumn(&item_arg_const->getDataColumn()); if (item_const_string) Impl::String::process( - left.getChars(), - data.offsets, - left.getOffsets(), + left->getChars(), + array->getOffsets(), + left->getOffsets(), item_const_string->getChars(), item_const_string->getDataAt(0).size, - data.result->getData(), + result->getData(), null_map_data, null_map_item); else if (item_const_fixedstring) Impl::String::process( - left.getChars(), - data.offsets, - left.getOffsets(), + left->getChars(), + array->getOffsets(), + left->getOffsets(), item_const_fixedstring->getChars(), item_const_fixedstring->getN(), - data.result->getData(), + result->getData(), null_map_data, null_map_item); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Logical error: ColumnConst contains not String nor FixedString column"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnConst contains not String nor FixedString column"); } - else if (const auto *const item_arg_vector = checkAndGetColumn(&data.right)) + else if (const auto * item_arg_vector = checkAndGetColumn(&right)) { Impl::String::process( - left.getChars(), - data.offsets, - left.getOffsets(), + left->getChars(), + array->getOffsets(), + left->getOffsets(), item_arg_vector->getChars(), item_arg_vector->getOffsets(), - data.result->getData(), + result->getData(), null_map_data, null_map_item); } else - return false; + { + return nullptr; + } - data.moveResult(); - return true; + return result; } static ColumnPtr executeConst(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) @@ -955,9 +845,7 @@ private: return nullptr; Array arr = col_array->getValue(); - - const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - const IColumn * item_arg = right_ptr.get(); + const IColumn * item_arg = arguments[1].column.get(); if (isColumnConst(*item_arg)) { @@ -1026,48 +914,59 @@ private: } } + static ColumnPtr executeNothing(const ColumnsWithTypeAndName & arguments) + { + const auto * array = checkAndGetColumn(arguments[0].column.get()); + if (!array) + return nullptr; + + if (arguments[1].column->onlyNull()) + { + auto result = ResultColumnType::create(); + Impl::Null::process(array->getOffsets(), result->getData(), getNullMaps(arguments).first); + return result; + } + + return nullptr; + } + static ColumnPtr executeGeneric(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * col = checkAndGetColumn(arguments[0].column.get()); - - if (!col) + const auto * col_array = checkAndGetColumn(arguments[0].column.get()); + if (!col_array) return nullptr; DataTypePtr array_elements_type = assert_cast(*arguments[0].type).getNestedType(); const DataTypePtr & index_type = arguments[1].type; - DataTypePtr common_type = getLeastSupertype(DataTypes{array_elements_type, index_type}); - - ColumnPtr col_nested = castColumn({ col->getDataPtr(), array_elements_type, "" }, common_type); - - const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - ColumnPtr item_arg = castColumn({ right_ptr, removeLowCardinality(index_type), "" }, common_type); + DataTypePtr common_type = getLeastSupertype(DataTypes{array_elements_type, arguments[1].type}); + ColumnPtr col_nested = castColumn({ col_array->getDataPtr(), array_elements_type, "" }, common_type); + ColumnPtr item_arg = castColumn({ arguments[1].column, removeLowCardinality(index_type), "" }, common_type); auto col_res = ResultColumnType::create(); auto [null_map_data, null_map_item] = getNullMaps(arguments); - if (item_arg->onlyNull()) - Impl::Null::process( - col->getOffsets(), - col_res->getData(), - null_map_data); - else if (isColumnConst(*item_arg)) + if (const auto * item_arg_const = checkAndGetColumn(item_arg.get())) + { Impl::Main::vector( *col_nested, - col->getOffsets(), - typeid_cast(*item_arg).getDataColumn(), + col_array->getOffsets(), + item_arg_const->getDataColumn(), col_res->getData(), /// TODO This is wrong. null_map_data, nullptr); + } else + { Impl::Main::vector( *col_nested, - col->getOffsets(), + col_array->getOffsets(), *item_arg, col_res->getData(), null_map_data, null_map_item); + } return col_res; } diff --git a/tests/queries/0_stateless/03199_has_lc_fixed_string.reference b/tests/queries/0_stateless/03199_has_lc_fixed_string.reference new file mode 100644 index 00000000000..b261da18d51 --- /dev/null +++ b/tests/queries/0_stateless/03199_has_lc_fixed_string.reference @@ -0,0 +1,2 @@ +1 +0 diff --git a/tests/queries/0_stateless/03199_has_lc_fixed_string.sql b/tests/queries/0_stateless/03199_has_lc_fixed_string.sql new file mode 100644 index 00000000000..3cb551804b7 --- /dev/null +++ b/tests/queries/0_stateless/03199_has_lc_fixed_string.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS 03199_fixedstring_array; +CREATE TABLE 03199_fixedstring_array (arr Array(LowCardinality(FixedString(8)))) ENGINE = Memory; +INSERT INTO 03199_fixedstring_array VALUES (['a', 'b']), (['c', 'd']); + +SELECT has(arr, toFixedString(materialize('a'), 1)) FROM 03199_fixedstring_array; + +DROP TABLE 03199_fixedstring_array; From e7e2b0953c0df57c95f6b5e0f6cc1afe5914c4e9 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Thu, 4 Jul 2024 14:50:51 +0000 Subject: [PATCH 379/439] Prevent another possible buffer overflow --- src/Functions/bitShiftLeft.cpp | 4 ++-- src/Functions/bitShiftRight.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index 8e39ed86461..d561430d51f 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -42,7 +42,7 @@ struct BitShiftLeftImpl { const UInt8 word_size = 8 * sizeof(*pos); size_t n = end - pos; - const UInt256 bit_limit = word_size * n; + const UInt128 bit_limit = static_cast(word_size) * n; if (b < 0 || static_cast(b) > bit_limit) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if (b == bit_limit) @@ -110,7 +110,7 @@ struct BitShiftLeftImpl { const UInt8 word_size = 8; size_t n = end - pos; - const UInt256 bit_limit = word_size * n; + const UInt128 bit_limit = static_cast(word_size) * n; if (b < 0 || static_cast(b) > bit_limit) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if (b == bit_limit) diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 46cfcde8a33..05b8581c792 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -58,7 +58,7 @@ struct BitShiftRightImpl { const UInt8 word_size = 8; size_t n = end - pos; - const UInt256 bit_limit = word_size * n; + const UInt128 bit_limit = static_cast(word_size) * n; if (b < 0 || static_cast(b) > bit_limit) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if (b == bit_limit) @@ -98,7 +98,7 @@ struct BitShiftRightImpl { const UInt8 word_size = 8; size_t n = end - pos; - const UInt256 bit_limit = word_size * n; + const UInt128 bit_limit = static_cast(word_size) * n; if (b < 0 || static_cast(b) > bit_limit) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if (b == bit_limit) From 93afc8e6133365007488c4d8340f434f6e8a876f Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 4 Jul 2024 15:11:29 +0000 Subject: [PATCH 380/439] more precise warning message about sanitizers --- programs/server/Server.cpp | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index f992fdc13a9..d51d959a42a 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -625,6 +625,28 @@ static void initializeAzureSDKLogger( #endif } +#if defined(SANITIZER) +static std::vector getSanitizerNames() +{ + std::vector names; + +#if defined(ADDRESS_SANITIZER) + names.push_back("address"); +#endif +#if defined(THREAD_SANITIZER) + names.push_back("thread"); +#endif +#if defined(MEMORY_SANITIZER) + names.push_back("memory"); +#endif +#if defined(UNDEFINED_BEHAVIOR_SANITIZER) + names.push_back("undefined behavior"); +#endif + + return names; +} +#endif + int Server::main(const std::vector & /*args*/) try { @@ -711,7 +733,17 @@ try global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable."); #if defined(SANITIZER) - global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); + auto sanitizers = getSanitizerNames(); + + String log_message; + if (sanitizers.empty()) + log_message = "sanitizer"; + else if (sanitizers.size() == 1) + log_message = fmt::format("{} sanitizer", sanitizers.front()); + else + log_message = fmt::format("sanitizers ({})", fmt::join(sanitizers, ", ")); + + global_context->addWarningMessage(fmt::format("Server was built with {}. It will work slowly.", log_message)); #endif #if defined(SANITIZE_COVERAGE) || WITH_COVERAGE From 24ff0f601d5b8d474429d67b5ed8702c662c58ec Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 4 Jul 2024 17:15:32 +0200 Subject: [PATCH 381/439] update keeper bench example config file --- utils/keeper-bench/example.yaml | 67 +++++++++++++++++---------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/utils/keeper-bench/example.yaml b/utils/keeper-bench/example.yaml index e800e923482..c3a62a01eac 100644 --- a/utils/keeper-bench/example.yaml +++ b/utils/keeper-bench/example.yaml @@ -18,45 +18,46 @@ connections: host: "localhost:9181" -generator: - setup: +setup: + node: + name: "test3" + node: + name: "test_create" + node: + name: "test4" + node: + name: "test" + data: "somedata" node: - name: "test3" + repeat: 4 + name: + random_string: + size: 15 + data: + random_string: + size: + min_value: 10 + max_value: 20 node: - name: "test_create" - node: - name: "test4" - node: - name: "test" - data: "somedata" - node: - repeat: 4 - name: - random_string: - size: 15 - data: - random_string: - size: - min_value: 10 - max_value: 20 + repeat: 2 node: repeat: 2 - node: - repeat: 2 - name: - random_string: - size: 12 name: random_string: - size: 15 - data: - random_string: - size: - min_value: 10 - max_value: 20 - node: - name: "test2" - data: "somedata" + size: 12 + name: + random_string: + size: 15 + data: + random_string: + size: + min_value: 10 + max_value: 20 + node: + name: "test2" + data: "somedata" + +generator: requests: create: path: "/test_create" From e176587c592370cb78801d364ba428e9935877eb Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Jul 2024 17:16:25 +0200 Subject: [PATCH 382/439] Trying even worse fix --- docker/test/fasttest/Dockerfile | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index a818e01ccca..dba31525b78 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -36,10 +36,16 @@ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld # FIXME: workaround for "The imported target "merge-fdata" references the file" error # https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake + # LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path. # It's very dirty workaround, better to build compiler and LLVM ourself and use it. -RUN ls /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu 2>/dev/null 1>/dev/null && ln -s /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null ||: -RUN ls /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu 2>/dev/null 1>/dev/null && ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null ||: +RUN ls /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu 2>/dev/null 1>/dev/null \ + && ln -s /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null \ + && cd /usr/lib/llvm-18/lib/clang/18/lib/linux && rename 's/.a.syms$/-aarch64.a.syms/' *.a.syms && rename 's/.a$/-aarch64.a/' *.a && rename 's/.so$/-aarch64.so/' *.so && rename 's/.o$/-aarch64.o/' *.o ||: + +RUN ls /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu 2>/dev/null 1>/dev/null \ + && ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null \ + && cd /usr/lib/llvm-18/lib/clang/18/lib/linux && rename 's/.a.syms$/-x86_64.a.syms/' *.a.syms && rename 's/.a$/-x86_64.a/' *.a && rename 's/.so$/-x86_64.so/' *.so && rename 's/.o$/-x86_64.o/' *.o ||: ARG CCACHE_VERSION=4.6.1 RUN mkdir /tmp/ccache \ From c49d26bc239847f701b8d9942ed136b338b26024 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 4 Jul 2024 17:19:24 +0200 Subject: [PATCH 383/439] Avoid using source directory for generated files Signed-off-by: Azat Khuzhin --- contrib/aws-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/aws-cmake/CMakeLists.txt b/contrib/aws-cmake/CMakeLists.txt index abde20addaf..250b47b7c2c 100644 --- a/contrib/aws-cmake/CMakeLists.txt +++ b/contrib/aws-cmake/CMakeLists.txt @@ -125,7 +125,7 @@ configure_file("${AWS_SDK_CORE_DIR}/include/aws/core/SDKConfig.h.in" "${CMAKE_CURRENT_BINARY_DIR}/include/aws/core/SDKConfig.h" @ONLY) aws_get_version(AWS_CRT_CPP_VERSION_MAJOR AWS_CRT_CPP_VERSION_MINOR AWS_CRT_CPP_VERSION_PATCH FULL_VERSION GIT_HASH) -configure_file("${AWS_CRT_DIR}/include/aws/crt/Config.h.in" "${AWS_CRT_DIR}/include/aws/crt/Config.h" @ONLY) +configure_file("${AWS_CRT_DIR}/include/aws/crt/Config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/include/aws/crt/Config.h" @ONLY) list(APPEND AWS_SOURCES ${AWS_SDK_CORE_SRC} ${AWS_SDK_CORE_NET_SRC} ${AWS_SDK_CORE_PLATFORM_SRC}) From 2c9421812063cc22133f508e10033accb611d6d1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 4 Jul 2024 15:01:40 +0000 Subject: [PATCH 384/439] Random header fixes for libcxx 16 --- base/poco/Foundation/include/Poco/Logger.h | 2 ++ base/poco/Foundation/include/Poco/Message.h | 1 + src/Common/formatIPv6.h | 1 + src/Coordination/Changelog.h | 1 + src/Coordination/FourLetterCommand.h | 1 + src/Disks/ObjectStorages/MetadataOperationsHolder.h | 1 + src/IO/Archives/hasRegisteredArchiveFileExtension.cpp | 2 ++ src/Loggers/OwnSplitChannel.h | 1 + src/Storages/MergeTree/IPartMetadataManager.h | 1 + 9 files changed, 11 insertions(+) diff --git a/base/poco/Foundation/include/Poco/Logger.h b/base/poco/Foundation/include/Poco/Logger.h index 2a1cb33b407..74ddceea9dd 100644 --- a/base/poco/Foundation/include/Poco/Logger.h +++ b/base/poco/Foundation/include/Poco/Logger.h @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include "Poco/Channel.h" diff --git a/base/poco/Foundation/include/Poco/Message.h b/base/poco/Foundation/include/Poco/Message.h index 9068e56a93c..756e427c5f5 100644 --- a/base/poco/Foundation/include/Poco/Message.h +++ b/base/poco/Foundation/include/Poco/Message.h @@ -19,6 +19,7 @@ #include +#include #include "Poco/Foundation.h" #include "Poco/Timestamp.h" diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h index bb83e0381ef..abeda95ed0d 100644 --- a/src/Common/formatIPv6.h +++ b/src/Common/formatIPv6.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 2e8dbe75e90..c9b45d9a344 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -5,6 +5,7 @@ #include #include +#include #include #include diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index 82b30a0b5f6..2a53bade62f 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -2,6 +2,7 @@ #include "config.h" +#include #include #include #include diff --git a/src/Disks/ObjectStorages/MetadataOperationsHolder.h b/src/Disks/ObjectStorages/MetadataOperationsHolder.h index 8997f40b9a2..a042f4bd8b9 100644 --- a/src/Disks/ObjectStorages/MetadataOperationsHolder.h +++ b/src/Disks/ObjectStorages/MetadataOperationsHolder.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp b/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp index 2a979f500f7..407977f1f13 100644 --- a/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp +++ b/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp @@ -1,5 +1,7 @@ #include +#include +#include namespace DB { diff --git a/src/Loggers/OwnSplitChannel.h b/src/Loggers/OwnSplitChannel.h index 7ca27cf6584..88bb6b9ce76 100644 --- a/src/Loggers/OwnSplitChannel.h +++ b/src/Loggers/OwnSplitChannel.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Storages/MergeTree/IPartMetadataManager.h b/src/Storages/MergeTree/IPartMetadataManager.h index cef1d10e4ad..e817421f7d0 100644 --- a/src/Storages/MergeTree/IPartMetadataManager.h +++ b/src/Storages/MergeTree/IPartMetadataManager.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include From 14f54cb6e96066d90946a7e97ebd87b76160ab14 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 4 Jul 2024 15:44:16 +0000 Subject: [PATCH 385/439] slightly better calculation of primary index --- .../MergeTree/IMergeTreeDataPartWriter.cpp | 19 +++++- .../MergeTreeDataPartWriterOnDisk.cpp | 65 ++++++++++--------- .../MergeTree/MergeTreeDataPartWriterOnDisk.h | 9 +-- 3 files changed, 55 insertions(+), 38 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index 6152da78395..c87f66b64f3 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -71,9 +72,21 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( Columns IMergeTreeDataPartWriter::releaseIndexColumns() { - return Columns( - std::make_move_iterator(index_columns.begin()), - std::make_move_iterator(index_columns.end())); + /// The memory for index was allocated without thread memory tracker. + /// We need to deallocate it in shrinkToFit without memory tracker as well. + MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; + + Columns result; + result.reserve(index_columns.size()); + + for (auto & column : index_columns) + { + column->shrinkToFit(); + result.push_back(std::move(column)); + } + + index_columns.clear(); + return result; } SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index a576720294f..5c9191dbb54 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -254,6 +254,12 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() index_compressor_stream = std::make_unique(*index_file_hashing_stream, primary_key_compression_codec, settings.primary_key_compress_block_size); index_source_hashing_stream = std::make_unique(*index_compressor_stream); } + + const auto & primary_key_types = metadata_snapshot->getPrimaryKey().data_types; + index_serializations.reserve(primary_key_types.size()); + + for (const auto & type : primary_key_types) + index_serializations.push_back(type->getDefaultSerialization()); } } @@ -299,22 +305,33 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() store = std::make_shared(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; } + skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings)); skip_index_accumulated_marks.push_back(0); } } +void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row) +{ + chassert(index_block.columns() == index_serializations.size()); + auto & index_stream = compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream; + + for (size_t i = 0; i < index_block.columns(); ++i) + { + const auto & column = index_block.getByPosition(i).column; + + index_columns[i]->insertFrom(*column, row); + index_serializations[i]->serializeBinary(*column, row, index_stream, {}); + } +} + void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Block & primary_index_block, const Granules & granules_to_write) { - size_t primary_columns_num = primary_index_block.columns(); + if (!metadata_snapshot->hasPrimaryKey()) + return; + if (index_columns.empty()) - { - index_types = primary_index_block.getDataTypes(); - index_columns.resize(primary_columns_num); - last_block_index_columns.resize(primary_columns_num); - for (size_t i = 0; i < primary_columns_num; ++i) - index_columns[i] = primary_index_block.getByPosition(i).column->cloneEmpty(); - } + index_columns = primary_index_block.cloneEmptyColumns(); { /** While filling index (index_columns), disable memory tracker. @@ -328,22 +345,14 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc /// Write index. The index contains Primary Key value for each `index_granularity` row. for (const auto & granule : granules_to_write) { - if (metadata_snapshot->hasPrimaryKey() && granule.mark_on_start) - { - for (size_t j = 0; j < primary_columns_num; ++j) - { - const auto & primary_column = primary_index_block.getByPosition(j); - index_columns[j]->insertFrom(*primary_column.column, granule.start_row); - primary_column.type->getDefaultSerialization()->serializeBinary( - *primary_column.column, granule.start_row, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); - } - } + if (granule.mark_on_start) + calculateAndSerializePrimaryIndexRow(primary_index_block, granule.start_row); } } - /// store last index row to write final mark at the end of column - for (size_t j = 0; j < primary_columns_num; ++j) - last_block_index_columns[j] = primary_index_block.getByPosition(j).column; + /// Store block with last index row to write final mark at the end of column + if (with_final_mark) + last_index_block = primary_index_block; } void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block) @@ -420,17 +429,11 @@ void MergeTreeDataPartWriterOnDisk::fillPrimaryIndexChecksums(MergeTreeData::Dat if (index_file_hashing_stream) { - if (write_final_mark) + if (write_final_mark && last_index_block) { - for (size_t j = 0; j < index_columns.size(); ++j) - { - const auto & column = *last_block_index_columns[j]; - size_t last_row_number = column.size() - 1; - index_columns[j]->insertFrom(column, last_row_number); - index_types[j]->getDefaultSerialization()->serializeBinary( - column, last_row_number, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); - } - last_block_index_columns.clear(); + MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; + calculateAndSerializePrimaryIndexRow(last_index_block, last_index_block.rows() - 1); + last_index_block.clear(); } if (compress_primary_key) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index bdf0fdb7f32..8d84442981e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -173,10 +173,10 @@ protected: std::unique_ptr index_source_hashing_stream; bool compress_primary_key; - DataTypes index_types; - /// Index columns from the last block - /// It's written to index file in the `writeSuffixAndFinalizePart` method - Columns last_block_index_columns; + /// Last block with index columns. + /// It's written to index file in the `writeSuffixAndFinalizePart` method. + Block last_index_block; + Serializations index_serializations; bool data_written = false; @@ -193,6 +193,7 @@ private: void initStatistics(); virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0; + void calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row); struct ExecutionStatistics { From 6dd13dd34ab397d5e18d01820a064f18ed25595a Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 4 Jul 2024 17:59:07 +0200 Subject: [PATCH 386/439] fix clean-up process --- utils/keeper-bench/Runner.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 5ae4c7a0b1c..587e015b340 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -1311,9 +1311,9 @@ void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & pa while (!children_span.empty()) { Coordination::Requests ops; - for (size_t i = 0; i < 1000 && !children.empty(); ++i) + for (size_t i = 0; i < 1000 && !children_span.empty(); ++i) { - removeRecursive(zookeeper, fs::path(path) / children.back()); + removeRecursive(zookeeper, fs::path(path) / children_span.back()); ops.emplace_back(zkutil::makeRemoveRequest(fs::path(path) / children_span.back(), -1)); children_span = children_span.subspan(0, children_span.size() - 1); } From eb12d4e37e63212e831ccb92fc3c9818da7ba866 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 4 Jul 2024 18:02:10 +0200 Subject: [PATCH 387/439] update time-window-functions --- .../functions/time-window-functions.md | 198 ++++++++++++++---- 1 file changed, 162 insertions(+), 36 deletions(-) diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md index 2cec1987c20..bad545fc5a5 100644 --- a/docs/en/sql-reference/functions/time-window-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -6,44 +6,120 @@ sidebar_label: Time Window # Time Window Functions -Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with WindowView are listed below: +Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with [WindowView](../statements/create/view.md/#window-view-experimental) are listed below: ## tumble A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`). +**Syntax** + ``` sql tumble(time_attr, interval [, timezone]) ``` **Arguments** -- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. -- `interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). **Returned values** -- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. +- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md)). **Example** Query: ``` sql -SELECT tumble(now(), toIntervalDay('1')) +SELECT tumble(now(), toIntervalDay('1')); ``` Result: ``` text ┌─tumble(now(), toIntervalDay('1'))─────────────┐ -│ ['2020-01-01 00:00:00','2020-01-02 00:00:00'] │ +│ ('2024-07-04 00:00:00','2024-07-05 00:00:00') │ └───────────────────────────────────────────────┘ ``` +## tumbleStart + +Returns the inclusive lower bound of the corresponding [tumbling window](#tumble). + +**Syntax** + +``` sql +tumbleStart(bounds_tuple); +tumbleStart(time_attr, interval [, timezone]); +``` + +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +**Returned values** + +- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT tumbleStart(now(), toIntervalDay('1')); +``` + +Result: + +```response +┌─tumbleStart(now(), toIntervalDay('1'))─┐ +│ 2024-07-04 00:00:00 │ +└────────────────────────────────────────┘ +``` + +## tumbleEnd + +Returns the exclusive upper bound of the corresponding [tumbling window](#tumble). + +**Syntax** + +``` sql +tumbleEnd(bounds_tuple); +tumbleEnd(time_attr, interval [, timezone]); +``` + +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +**Returned values** + +- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT tumbleEnd(now(), toIntervalDay('1')); +``` + +Result: + +```response +┌─tumbleEnd(now(), toIntervalDay('1'))─┐ +│ 2024-07-05 00:00:00 │ +└──────────────────────────────────────┘ +``` + ## hop -A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows. +A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows. ``` sql hop(time_attr, hop_interval, window_interval [, timezone]) @@ -51,66 +127,116 @@ hop(time_attr, hop_interval, window_interval [, timezone]) **Arguments** -- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. -- `hop_interval` - Hop interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. -- `window_interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md). +- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). **Returned values** -- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. +- The inclusive lower and exclusive upper bound of the corresponding hopping window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. + +:::note +Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. +::: **Example** Query: ``` sql -SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND) +SELECT hop(now(), INTERVAL '1' DAY, INTERVAL '2' DAY); ``` Result: ``` text -┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐ -│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │ -└───────────────────────────────────────────────────────────┘ -``` - -## tumbleStart - -Returns the inclusive lower bound of the corresponding tumbling window. - -``` sql -tumbleStart(bounds_tuple); -tumbleStart(time_attr, interval [, timezone]); -``` - -## tumbleEnd - -Returns the exclusive upper bound of the corresponding tumbling window. - -``` sql -tumbleEnd(bounds_tuple); -tumbleEnd(time_attr, interval [, timezone]); +┌─hop(now(), toIntervalDay('1'), toIntervalDay('2'))─┐ +│ ('2024-07-03 00:00:00','2024-07-05 00:00:00') │ +└────────────────────────────────────────────────────┘ ``` ## hopStart -Returns the inclusive lower bound of the corresponding hopping window. +Returns the inclusive lower bound of the corresponding [hopping window](#hop). + +**Syntax** ``` sql hopStart(bounds_tuple); hopStart(time_attr, hop_interval, window_interval [, timezone]); ``` +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md). +- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +**Returned values** + +- The inclusive lower bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +:::note +Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. +::: + +**Example** + +Query: + +``` sql +SELECT hopStart(now(), INTERVAL '1' DAY, INTERVAL '2' DAY); +``` + +Result: + +``` text +┌─hopStart(now(), toIntervalDay('1'), toIntervalDay('2'))─┐ +│ 2024-07-03 00:00:00 │ +└─────────────────────────────────────────────────────────┘ +``` ## hopEnd -Returns the exclusive upper bound of the corresponding hopping window. +Returns the exclusive upper bound of the corresponding [hopping window](#hop). + +**Syntax** ``` sql hopEnd(bounds_tuple); hopEnd(time_attr, hop_interval, window_interval [, timezone]); ``` +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md). +- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +**Returned values** + +- The exclusive upper bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +:::note +Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. +::: + +**Example** + +Query: + +``` sql +SELECT hopEnd(now(), INTERVAL '1' DAY, INTERVAL '2' DAY); +``` + +Result: + +``` text +┌─hopEnd(now(), toIntervalDay('1'), toIntervalDay('2'))─┐ +│ 2024-07-05 00:00:00 │ +└───────────────────────────────────────────────────────┘ +``` ## Related content From c98b411edd34450c9954f8d086ae014fb80d1d8a Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 4 Jul 2024 16:11:12 +0000 Subject: [PATCH 388/439] fix tests --- .../0_stateless/02993_lazy_index_loading.reference | 2 +- .../03127_system_unload_primary_key_table.reference | 8 ++++---- .../0_stateless/03128_system_unload_primary_key.reference | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/02993_lazy_index_loading.reference b/tests/queries/0_stateless/02993_lazy_index_loading.reference index 5bc329ae4eb..08f07a92815 100644 --- a/tests/queries/0_stateless/02993_lazy_index_loading.reference +++ b/tests/queries/0_stateless/02993_lazy_index_loading.reference @@ -1,4 +1,4 @@ -100000000 140000000 +100000000 100000000 0 0 1 100000000 100000000 diff --git a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference index 3ac6127fb21..2d33f7f6683 100644 --- a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference +++ b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference @@ -1,8 +1,8 @@ -100000000 140000000 -100000000 140000000 -100000000 140000000 +100000000 100000000 +100000000 100000000 +100000000 100000000 0 0 -100000000 140000000 +100000000 100000000 0 0 0 0 1 diff --git a/tests/queries/0_stateless/03128_system_unload_primary_key.reference b/tests/queries/0_stateless/03128_system_unload_primary_key.reference index c7b40ae5b06..2646dc7247f 100644 --- a/tests/queries/0_stateless/03128_system_unload_primary_key.reference +++ b/tests/queries/0_stateless/03128_system_unload_primary_key.reference @@ -1,4 +1,4 @@ -100000000 140000000 -100000000 140000000 +100000000 100000000 +100000000 100000000 0 0 0 0 From e7c0ff54c936fb619f89011719f47bd4ba27064b Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 4 Jul 2024 17:19:35 +0000 Subject: [PATCH 389/439] fix tests --- src/Functions/array/arrayIndex.h | 3 +++ .../queries/0_stateless/02010_array_index_bad_cast.reference | 3 +++ tests/queries/0_stateless/02010_array_index_bad_cast.sql | 5 +++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index 067957f0d4b..111ab92b006 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -744,6 +744,9 @@ private: nullptr, nullptr); + if (col_array_const) + return ColumnConst::create(std::move(col_result), arguments[0].column->size()); + return col_result; } diff --git a/tests/queries/0_stateless/02010_array_index_bad_cast.reference b/tests/queries/0_stateless/02010_array_index_bad_cast.reference index e69de29bb2d..e22493782f0 100644 --- a/tests/queries/0_stateless/02010_array_index_bad_cast.reference +++ b/tests/queries/0_stateless/02010_array_index_bad_cast.reference @@ -0,0 +1,3 @@ +1 +0 +0 diff --git a/tests/queries/0_stateless/02010_array_index_bad_cast.sql b/tests/queries/0_stateless/02010_array_index_bad_cast.sql index 14162e0d2e2..590e60eb42e 100644 --- a/tests/queries/0_stateless/02010_array_index_bad_cast.sql +++ b/tests/queries/0_stateless/02010_array_index_bad_cast.sql @@ -1,3 +1,4 @@ --- This query throws exception about uncomparable data types (but at least it does not introduce bad cast in code). SET allow_suspicious_low_cardinality_types=1; -SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14'::DateTime64(7))); -- { serverError ILLEGAL_COLUMN } +SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14'::DateTime64(7))); +SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14 00:00:01'::DateTime64(7))); +SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize(NULL)); From 78a2139f2a43752196a029995b6965ada359c954 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 4 Jul 2024 19:27:10 +0200 Subject: [PATCH 390/439] restore timeouts, mark as no-fasttests --- .../queries/0_stateless/03172_error_log_table_not_empty.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03172_error_log_table_not_empty.sh b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh index 4b83400f5de..22a2fd82c64 100755 --- a/tests/queries/0_stateless/03172_error_log_table_not_empty.sh +++ b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh @@ -1,4 +1,6 @@ #!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: this test relies on the timeouts, it always takes no less that 4 seconds to run CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -12,11 +14,12 @@ errors_111=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHER errors_222=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 222") errors_333=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 333") -# Throw three random errors: 111, 222 and 333 and call flush logs to ensure system.error_log is flushed +# Throw three random errors: 111, 222 and 333 and wait for more than collect_interval_milliseconds to ensure system.error_log is flushed $CLICKHOUSE_CLIENT -mn -q " SELECT throwIf(true, 'error_log', toInt16(111)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 111 } SELECT throwIf(true, 'error_log', toInt16(222)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 222 } SELECT throwIf(true, 'error_log', toInt16(333)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 333 } +SELECT sleep(2) format NULL; SYSTEM FLUSH LOGS; " @@ -32,6 +35,7 @@ $CLICKHOUSE_CLIENT -mn -q " SELECT throwIf(true, 'error_log', toInt16(111)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 111 } SELECT throwIf(true, 'error_log', toInt16(222)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 222 } SELECT throwIf(true, 'error_log', toInt16(333)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 333 } +SELECT sleep(2) format NULL; SYSTEM FLUSH LOGS; " From df0cce24ee4f7e74b53c667cfc8b43a7e3e142ea Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 4 Jul 2024 19:34:47 +0200 Subject: [PATCH 391/439] CI: Fix sync pr merge --- tests/ci/sync_pr.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py index 8251ccbaf38..1b71231f820 100644 --- a/tests/ci/sync_pr.py +++ b/tests/ci/sync_pr.py @@ -101,23 +101,20 @@ def main(): assert pr_info.merged_pr, "BUG. merged PR number could not been determined" prs = gh.get_pulls_from_search( - query=f"head:sync-upstream/pr/{pr_info.merged_pr} org:ClickHouse type:pr", + query=f"head:sync-upstream/pr/{pr_info.merged_pr} org:ClickHouse type:pr is:open", repo="ClickHouse/clickhouse-private", ) - sync_pr = None - if len(prs) > 1: print(f"WARNING: More than one PR found [{prs}] - exiting") elif len(prs) == 0: print("WARNING: No Sync PR found") else: sync_pr = prs[0] - - if args.merge: - merge_sync_pr(gh, sync_pr) - elif args.status: - set_sync_status(gh, pr_info, sync_pr) + if args.merge: + merge_sync_pr(gh, sync_pr) + elif args.status: + set_sync_status(gh, pr_info, sync_pr) if __name__ == "__main__": From ad23d211b9a1c1c8dbe0ecec73fe6277f930b6e2 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 4 Jul 2024 18:56:26 +0100 Subject: [PATCH 392/439] impl --- src/Common/BinStringDecodeHelper.h | 6 ++-- .../FunctionsBinaryRepresentation.cpp | 31 ++++++++++++----- src/Parsers/ExpressionElementParsers.cpp | 4 +-- .../03199_unbin_buffer_overflow.reference | 0 .../03199_unbin_buffer_overflow.sh | 33 +++++++++++++++++++ 5 files changed, 60 insertions(+), 14 deletions(-) create mode 100644 tests/queries/0_stateless/03199_unbin_buffer_overflow.reference create mode 100755 tests/queries/0_stateless/03199_unbin_buffer_overflow.sh diff --git a/src/Common/BinStringDecodeHelper.h b/src/Common/BinStringDecodeHelper.h index df3e014cfad..03c175fd37f 100644 --- a/src/Common/BinStringDecodeHelper.h +++ b/src/Common/BinStringDecodeHelper.h @@ -5,7 +5,7 @@ namespace DB { -static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size = 2) +static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size) { if ((end - pos) & 1) { @@ -23,7 +23,7 @@ static void inline hexStringDecode(const char * pos, const char * end, char *& o ++out; } -static void inline binStringDecode(const char * pos, const char * end, char *& out) +static void inline binStringDecode(const char * pos, const char * end, char *& out, size_t word_size) { if (pos == end) { @@ -53,7 +53,7 @@ static void inline binStringDecode(const char * pos, const char * end, char *& o ++out; } - assert((end - pos) % 8 == 0); + chassert((end - pos) % word_size == 0); while (end - pos != 0) { diff --git a/src/Functions/FunctionsBinaryRepresentation.cpp b/src/Functions/FunctionsBinaryRepresentation.cpp index 0f3f8be96a7..ab10d402df4 100644 --- a/src/Functions/FunctionsBinaryRepresentation.cpp +++ b/src/Functions/FunctionsBinaryRepresentation.cpp @@ -3,14 +3,14 @@ #include #include #include -#include -#include #include #include #include #include #include #include +#include +#include namespace DB { @@ -218,10 +218,7 @@ struct UnbinImpl static constexpr auto name = "unbin"; static constexpr size_t word_size = 8; - static void decode(const char * pos, const char * end, char *& out) - { - binStringDecode(pos, end, out); - } + static void decode(const char * pos, const char * end, char *& out) { binStringDecode(pos, end, out, word_size); } }; /// Encode number or string to string with binary or hexadecimal representation @@ -651,7 +648,15 @@ public: size_t size = in_offsets.size(); out_offsets.resize(size); - out_vec.resize(in_vec.size() / word_size + size); + + size_t max_out_len = 0; + for (size_t i = 0; i < in_offsets.size(); ++i) + { + const size_t len = in_offsets[i] - (i == 0 ? 0 : in_offsets[i - 1]) + - /* trailing zero symbol that is always added in ColumnString and that is ignored while decoding */ 1; + max_out_len += (len + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1; + } + out_vec.resize(max_out_len); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; @@ -661,6 +666,7 @@ public: { size_t new_offset = in_offsets[i]; + /// `new_offset - 1` because in ColumnString each string is stored with trailing zero byte Impl::decode(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset - 1]), pos); out_offsets[i] = pos - begin; @@ -668,6 +674,9 @@ public: prev_offset = new_offset; } + chassert( + static_cast(pos - begin) <= out_vec.size(), + fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size())); out_vec.resize(pos - begin); return col_res; @@ -680,11 +689,11 @@ public: ColumnString::Offsets & out_offsets = col_res->getOffsets(); const ColumnString::Chars & in_vec = col_fix_string->getChars(); - size_t n = col_fix_string->getN(); + const size_t n = col_fix_string->getN(); size_t size = col_fix_string->size(); out_offsets.resize(size); - out_vec.resize(in_vec.size() / word_size + size); + out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * size); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; @@ -694,6 +703,7 @@ public: { size_t new_offset = prev_offset + n; + /// here we don't subtract 1 from `new_offset` because in ColumnFixedString strings are stored without trailing zero byte Impl::decode(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset]), pos); out_offsets[i] = pos - begin; @@ -701,6 +711,9 @@ public: prev_offset = new_offset; } + chassert( + static_cast(pos - begin) <= out_vec.size(), + fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size())); out_vec.resize(pos - begin); return col_res; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 5997452bcf3..d4fc9a4bc4d 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1129,11 +1129,11 @@ inline static bool makeHexOrBinStringLiteral(IParser::Pos & pos, ASTPtr & node, if (hex) { - hexStringDecode(str_begin, str_end, res_pos); + hexStringDecode(str_begin, str_end, res_pos, word_size); } else { - binStringDecode(str_begin, str_end, res_pos); + binStringDecode(str_begin, str_end, res_pos, word_size); } return makeStringLiteral(pos, node, String(reinterpret_cast(res.data()), (res_pos - res_begin - 1))); diff --git a/tests/queries/0_stateless/03199_unbin_buffer_overflow.reference b/tests/queries/0_stateless/03199_unbin_buffer_overflow.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03199_unbin_buffer_overflow.sh b/tests/queries/0_stateless/03199_unbin_buffer_overflow.sh new file mode 100755 index 00000000000..337debebb14 --- /dev/null +++ b/tests/queries/0_stateless/03199_unbin_buffer_overflow.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +# check for buffer overflow in unbin (due to not enough memory preallocate for output buffer) +# we iterate over all remainders of input string length modulo word_size and check that no assertions are triggered + +word_size=8 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unbin(toFixedString(materialize('$str'), $i)) WITH ROLLUP WITH TOTALS FORMAT NULL" +done + +word_size=8 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unbin(materialize('$str')) WITH ROLLUP WITH TOTALS FORMAT NULL" +done + +word_size=2 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unhex(toFixedString(materialize('$str'), $i)) WITH ROLLUP WITH TOTALS FORMAT NULL" +done + +word_size=2 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unhex(materialize('$str')) WITH ROLLUP WITH TOTALS FORMAT NULL" +done From 597810b69d7655b9049ded3b529d12a23996a770 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 4 Jul 2024 18:46:09 +0000 Subject: [PATCH 393/439] Fix s390x build --- contrib/s2geometry-cmake/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/contrib/s2geometry-cmake/CMakeLists.txt b/contrib/s2geometry-cmake/CMakeLists.txt index 48562b8cead..5eabe71b538 100644 --- a/contrib/s2geometry-cmake/CMakeLists.txt +++ b/contrib/s2geometry-cmake/CMakeLists.txt @@ -1,6 +1,7 @@ option(ENABLE_S2_GEOMETRY "Enable S2 Geometry" ${ENABLE_LIBRARIES}) -if (NOT ENABLE_S2_GEOMETRY) +# ARCH_S390X broke upstream, it can be re-enabled once https://github.com/google/s2geometry/pull/372 is merged +if (NOT ENABLE_S2_GEOMETRY OR ARCH_S390X) message(STATUS "Not using S2 Geometry") return() endif() From 1d8389ddeefcc35c0fd04e3b0bb3cf99e55fdcbd Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 19:27:25 +0000 Subject: [PATCH 394/439] Bump rocksdb to v6.23.3 --- contrib/rocksdb | 2 +- contrib/rocksdb-cmake/CMakeLists.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/rocksdb b/contrib/rocksdb index 3a0b80ca9d6..078fa563869 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 3a0b80ca9d6eebb38fad7ea3f41dfc9db4f6a984 +Subproject commit 078fa5638690004e1f744076d1bdcc4e93767304 diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index c4220ba90ac..943e1d8acbd 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -1,7 +1,7 @@ -option (ENABLE_ROCKSDB "Enable rocksdb library" ${ENABLE_LIBRARIES}) +option (ENABLE_ROCKSDB "Enable RocksDB" ${ENABLE_LIBRARIES}) if (NOT ENABLE_ROCKSDB) - message (STATUS "Not using rocksdb") + message (STATUS "Not using RocksDB") return() endif() From ffe1f8fea019f08de4a9a32f99c1ebce4baeae71 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 4 Jul 2024 15:40:19 +0000 Subject: [PATCH 395/439] Bump Azure to 1.12 --- contrib/azure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/azure b/contrib/azure index 6262a76ef4c..92c94d7f37a 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit 6262a76ef4c4c330c84e58dd4f6f13f4e6230fcd +Subproject commit 92c94d7f37a43cc8fc4d466884a95f610c0593bf From e428542b2ea7340d6314d76c6043134c356677a0 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 18 Feb 2024 12:44:52 +0100 Subject: [PATCH 396/439] Add prometheus protobufs. --- contrib/CMakeLists.txt | 2 + .../prometheus-protobufs-cmake/CMakeLists.txt | 34 +++ contrib/prometheus-protobufs-gogo/LICENSE | 35 +++ contrib/prometheus-protobufs-gogo/README | 4 + .../gogoproto/gogo.proto | 145 +++++++++++++ contrib/prometheus-protobufs/LICENSE | 201 ++++++++++++++++++ contrib/prometheus-protobufs/README | 2 + .../prometheus-protobufs/prompb/remote.proto | 88 ++++++++ .../prometheus-protobufs/prompb/types.proto | 187 ++++++++++++++++ src/CMakeLists.txt | 4 + src/Common/config.h.in | 1 + src/configure_config.cmake | 3 + 12 files changed, 706 insertions(+) create mode 100644 contrib/prometheus-protobufs-cmake/CMakeLists.txt create mode 100644 contrib/prometheus-protobufs-gogo/LICENSE create mode 100644 contrib/prometheus-protobufs-gogo/README create mode 100644 contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto create mode 100644 contrib/prometheus-protobufs/LICENSE create mode 100644 contrib/prometheus-protobufs/README create mode 100644 contrib/prometheus-protobufs/prompb/remote.proto create mode 100644 contrib/prometheus-protobufs/prompb/types.proto diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 08f58335d16..90ae5981a21 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -228,6 +228,8 @@ add_contrib (ulid-c-cmake ulid-c) add_contrib (libssh-cmake libssh) +add_contrib (prometheus-protobufs-cmake prometheus-protobufs prometheus-protobufs-gogo) + # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, diff --git a/contrib/prometheus-protobufs-cmake/CMakeLists.txt b/contrib/prometheus-protobufs-cmake/CMakeLists.txt new file mode 100644 index 00000000000..8c939902be7 --- /dev/null +++ b/contrib/prometheus-protobufs-cmake/CMakeLists.txt @@ -0,0 +1,34 @@ +option(ENABLE_PROMETHEUS_PROTOBUFS "Enable Prometheus Protobufs" ${ENABLE_PROTOBUF}) + +if(NOT ENABLE_PROMETHEUS_PROTOBUFS) + message(STATUS "Not using prometheus-protobufs") + return() +endif() + +set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf/src") +set(Prometheus_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/prometheus-protobufs") +set(GogoProto_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/prometheus-protobufs-gogo") + +# Protobuf_IMPORT_DIRS specify where the protobuf compiler will look for .proto files. +set(Old_Protobuf_IMPORT_DIRS ${Protobuf_IMPORT_DIRS}) +list(APPEND Protobuf_IMPORT_DIRS "${Protobuf_INCLUDE_DIR}" "${Prometheus_INCLUDE_DIR}" "${GogoProto_INCLUDE_DIR}") + +PROTOBUF_GENERATE_CPP(prometheus_protobufs_sources prometheus_protobufs_headers + "prompb/remote.proto" + "prompb/types.proto" + "gogoproto/gogo.proto" +) + +set(Protobuf_IMPORT_DIRS ${Old_Protobuf_IMPORT_DIRS}) + +# Ignore warnings while compiling protobuf-generated *.pb.h and *.pb.cpp files. +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w") + +# Disable clang-tidy for protobuf-generated *.pb.h and *.pb.cpp files. +set (CMAKE_CXX_CLANG_TIDY "") + +add_library(_prometheus_protobufs ${prometheus_protobufs_sources} ${prometheus_protobufs_headers}) +target_include_directories(_prometheus_protobufs SYSTEM PUBLIC "${CMAKE_CURRENT_BINARY_DIR}") +target_link_libraries (_prometheus_protobufs PUBLIC ch_contrib::protobuf) + +add_library (ch_contrib::prometheus_protobufs ALIAS _prometheus_protobufs) diff --git a/contrib/prometheus-protobufs-gogo/LICENSE b/contrib/prometheus-protobufs-gogo/LICENSE new file mode 100644 index 00000000000..16be18e5c50 --- /dev/null +++ b/contrib/prometheus-protobufs-gogo/LICENSE @@ -0,0 +1,35 @@ +Copyright (c) 2022, The Cosmos SDK Authors. All rights reserved. +Copyright (c) 2013, The GoGo Authors. All rights reserved. + +Protocol Buffers for Go with Gadgets + +Go support for Protocol Buffers - Google's data interchange format + +Copyright 2010 The Go Authors. All rights reserved. +https://github.com/golang/protobuf + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/contrib/prometheus-protobufs-gogo/README b/contrib/prometheus-protobufs-gogo/README new file mode 100644 index 00000000000..c40bc42df66 --- /dev/null +++ b/contrib/prometheus-protobufs-gogo/README @@ -0,0 +1,4 @@ +File "gogoproto/gogo.proto" was downloaded from the "Protocol Buffers for Go with Gadgets" project: +https://github.com/cosmos/gogoproto/blob/main/gogoproto/gogo.proto + +File "gogoproto/gogo.proto" is used in ClickHouse to compile prometheus protobufs. diff --git a/contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto b/contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto new file mode 100644 index 00000000000..974b36a7ccd --- /dev/null +++ b/contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto @@ -0,0 +1,145 @@ +// Protocol Buffers for Go with Gadgets +// +// Copyright (c) 2013, The GoGo Authors. All rights reserved. +// http://github.com/cosmos/gogoproto +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +syntax = "proto2"; +package gogoproto; + +import "google/protobuf/descriptor.proto"; + +option java_package = "com.google.protobuf"; +option java_outer_classname = "GoGoProtos"; +option go_package = "github.com/cosmos/gogoproto/gogoproto"; + +extend google.protobuf.EnumOptions { + optional bool goproto_enum_prefix = 62001; + optional bool goproto_enum_stringer = 62021; + optional bool enum_stringer = 62022; + optional string enum_customname = 62023; + optional bool enumdecl = 62024; +} + +extend google.protobuf.EnumValueOptions { + optional string enumvalue_customname = 66001; +} + +extend google.protobuf.FileOptions { + optional bool goproto_getters_all = 63001; + optional bool goproto_enum_prefix_all = 63002; + optional bool goproto_stringer_all = 63003; + optional bool verbose_equal_all = 63004; + optional bool face_all = 63005; + optional bool gostring_all = 63006; + optional bool populate_all = 63007; + optional bool stringer_all = 63008; + optional bool onlyone_all = 63009; + + optional bool equal_all = 63013; + optional bool description_all = 63014; + optional bool testgen_all = 63015; + optional bool benchgen_all = 63016; + optional bool marshaler_all = 63017; + optional bool unmarshaler_all = 63018; + optional bool stable_marshaler_all = 63019; + + optional bool sizer_all = 63020; + + optional bool goproto_enum_stringer_all = 63021; + optional bool enum_stringer_all = 63022; + + optional bool unsafe_marshaler_all = 63023; + optional bool unsafe_unmarshaler_all = 63024; + + optional bool goproto_extensions_map_all = 63025; + optional bool goproto_unrecognized_all = 63026; + optional bool gogoproto_import = 63027; + optional bool protosizer_all = 63028; + optional bool compare_all = 63029; + optional bool typedecl_all = 63030; + optional bool enumdecl_all = 63031; + + optional bool goproto_registration = 63032; + optional bool messagename_all = 63033; + + optional bool goproto_sizecache_all = 63034; + optional bool goproto_unkeyed_all = 63035; +} + +extend google.protobuf.MessageOptions { + optional bool goproto_getters = 64001; + optional bool goproto_stringer = 64003; + optional bool verbose_equal = 64004; + optional bool face = 64005; + optional bool gostring = 64006; + optional bool populate = 64007; + optional bool stringer = 67008; + optional bool onlyone = 64009; + + optional bool equal = 64013; + optional bool description = 64014; + optional bool testgen = 64015; + optional bool benchgen = 64016; + optional bool marshaler = 64017; + optional bool unmarshaler = 64018; + optional bool stable_marshaler = 64019; + + optional bool sizer = 64020; + + optional bool unsafe_marshaler = 64023; + optional bool unsafe_unmarshaler = 64024; + + optional bool goproto_extensions_map = 64025; + optional bool goproto_unrecognized = 64026; + + optional bool protosizer = 64028; + optional bool compare = 64029; + + optional bool typedecl = 64030; + + optional bool messagename = 64033; + + optional bool goproto_sizecache = 64034; + optional bool goproto_unkeyed = 64035; +} + +extend google.protobuf.FieldOptions { + optional bool nullable = 65001; + optional bool embed = 65002; + optional string customtype = 65003; + optional string customname = 65004; + optional string jsontag = 65005; + optional string moretags = 65006; + optional string casttype = 65007; + optional string castkey = 65008; + optional string castvalue = 65009; + + optional bool stdtime = 65010; + optional bool stdduration = 65011; + optional bool wktpointer = 65012; + + optional string castrepeated = 65013; +} diff --git a/contrib/prometheus-protobufs/LICENSE b/contrib/prometheus-protobufs/LICENSE new file mode 100644 index 00000000000..261eeb9e9f8 --- /dev/null +++ b/contrib/prometheus-protobufs/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/contrib/prometheus-protobufs/README b/contrib/prometheus-protobufs/README new file mode 100644 index 00000000000..c557e59bb93 --- /dev/null +++ b/contrib/prometheus-protobufs/README @@ -0,0 +1,2 @@ +Files "prompb/remote.proto" and "prompb/types.proto" were downloaded from the Prometheus repository: +https://github.com/prometheus/prometheus/tree/main/prompb diff --git a/contrib/prometheus-protobufs/prompb/remote.proto b/contrib/prometheus-protobufs/prompb/remote.proto new file mode 100644 index 00000000000..50bb25e7fac --- /dev/null +++ b/contrib/prometheus-protobufs/prompb/remote.proto @@ -0,0 +1,88 @@ +// Copyright 2016 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +package prometheus; + +option go_package = "prompb"; + +import "prompb/types.proto"; +import "gogoproto/gogo.proto"; + +message WriteRequest { + repeated prometheus.TimeSeries timeseries = 1 [(gogoproto.nullable) = false]; + // Cortex uses this field to determine the source of the write request. + // We reserve it to avoid any compatibility issues. + reserved 2; + repeated prometheus.MetricMetadata metadata = 3 [(gogoproto.nullable) = false]; +} + +// ReadRequest represents a remote read request. +message ReadRequest { + repeated Query queries = 1; + + enum ResponseType { + // Server will return a single ReadResponse message with matched series that includes list of raw samples. + // It's recommended to use streamed response types instead. + // + // Response headers: + // Content-Type: "application/x-protobuf" + // Content-Encoding: "snappy" + SAMPLES = 0; + // Server will stream a delimited ChunkedReadResponse message that + // contains XOR or HISTOGRAM(!) encoded chunks for a single series. + // Each message is following varint size and fixed size bigendian + // uint32 for CRC32 Castagnoli checksum. + // + // Response headers: + // Content-Type: "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse" + // Content-Encoding: "" + STREAMED_XOR_CHUNKS = 1; + } + + // accepted_response_types allows negotiating the content type of the response. + // + // Response types are taken from the list in the FIFO order. If no response type in `accepted_response_types` is + // implemented by server, error is returned. + // For request that do not contain `accepted_response_types` field the SAMPLES response type will be used. + repeated ResponseType accepted_response_types = 2; +} + +// ReadResponse is a response when response_type equals SAMPLES. +message ReadResponse { + // In same order as the request's queries. + repeated QueryResult results = 1; +} + +message Query { + int64 start_timestamp_ms = 1; + int64 end_timestamp_ms = 2; + repeated prometheus.LabelMatcher matchers = 3; + prometheus.ReadHints hints = 4; +} + +message QueryResult { + // Samples within a time series must be ordered by time. + repeated prometheus.TimeSeries timeseries = 1; +} + +// ChunkedReadResponse is a response when response_type equals STREAMED_XOR_CHUNKS. +// We strictly stream full series after series, optionally split by time. This means that a single frame can contain +// partition of the single series, but once a new series is started to be streamed it means that no more chunks will +// be sent for previous one. Series are returned sorted in the same way TSDB block are internally. +message ChunkedReadResponse { + repeated prometheus.ChunkedSeries chunked_series = 1; + + // query_index represents an index of the query from ReadRequest.queries these chunks relates to. + int64 query_index = 2; +} diff --git a/contrib/prometheus-protobufs/prompb/types.proto b/contrib/prometheus-protobufs/prompb/types.proto new file mode 100644 index 00000000000..61fc1e0143e --- /dev/null +++ b/contrib/prometheus-protobufs/prompb/types.proto @@ -0,0 +1,187 @@ +// Copyright 2017 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +package prometheus; + +option go_package = "prompb"; + +import "gogoproto/gogo.proto"; + +message MetricMetadata { + enum MetricType { + UNKNOWN = 0; + COUNTER = 1; + GAUGE = 2; + HISTOGRAM = 3; + GAUGEHISTOGRAM = 4; + SUMMARY = 5; + INFO = 6; + STATESET = 7; + } + + // Represents the metric type, these match the set from Prometheus. + // Refer to github.com/prometheus/common/model/metadata.go for details. + MetricType type = 1; + string metric_family_name = 2; + string help = 4; + string unit = 5; +} + +message Sample { + double value = 1; + // timestamp is in ms format, see model/timestamp/timestamp.go for + // conversion from time.Time to Prometheus timestamp. + int64 timestamp = 2; +} + +message Exemplar { + // Optional, can be empty. + repeated Label labels = 1 [(gogoproto.nullable) = false]; + double value = 2; + // timestamp is in ms format, see model/timestamp/timestamp.go for + // conversion from time.Time to Prometheus timestamp. + int64 timestamp = 3; +} + +// A native histogram, also known as a sparse histogram. +// Original design doc: +// https://docs.google.com/document/d/1cLNv3aufPZb3fNfaJgdaRBZsInZKKIHo9E6HinJVbpM/edit +// The appendix of this design doc also explains the concept of float +// histograms. This Histogram message can represent both, the usual +// integer histogram as well as a float histogram. +message Histogram { + enum ResetHint { + UNKNOWN = 0; // Need to test for a counter reset explicitly. + YES = 1; // This is the 1st histogram after a counter reset. + NO = 2; // There was no counter reset between this and the previous Histogram. + GAUGE = 3; // This is a gauge histogram where counter resets don't happen. + } + + oneof count { // Count of observations in the histogram. + uint64 count_int = 1; + double count_float = 2; + } + double sum = 3; // Sum of observations in the histogram. + // The schema defines the bucket schema. Currently, valid numbers + // are -4 <= n <= 8. They are all for base-2 bucket schemas, where 1 + // is a bucket boundary in each case, and then each power of two is + // divided into 2^n logarithmic buckets. Or in other words, each + // bucket boundary is the previous boundary times 2^(2^-n). In the + // future, more bucket schemas may be added using numbers < -4 or > + // 8. + sint32 schema = 4; + double zero_threshold = 5; // Breadth of the zero bucket. + oneof zero_count { // Count in zero bucket. + uint64 zero_count_int = 6; + double zero_count_float = 7; + } + + // Negative Buckets. + repeated BucketSpan negative_spans = 8 [(gogoproto.nullable) = false]; + // Use either "negative_deltas" or "negative_counts", the former for + // regular histograms with integer counts, the latter for float + // histograms. + repeated sint64 negative_deltas = 9; // Count delta of each bucket compared to previous one (or to zero for 1st bucket). + repeated double negative_counts = 10; // Absolute count of each bucket. + + // Positive Buckets. + repeated BucketSpan positive_spans = 11 [(gogoproto.nullable) = false]; + // Use either "positive_deltas" or "positive_counts", the former for + // regular histograms with integer counts, the latter for float + // histograms. + repeated sint64 positive_deltas = 12; // Count delta of each bucket compared to previous one (or to zero for 1st bucket). + repeated double positive_counts = 13; // Absolute count of each bucket. + + ResetHint reset_hint = 14; + // timestamp is in ms format, see model/timestamp/timestamp.go for + // conversion from time.Time to Prometheus timestamp. + int64 timestamp = 15; +} + +// A BucketSpan defines a number of consecutive buckets with their +// offset. Logically, it would be more straightforward to include the +// bucket counts in the Span. However, the protobuf representation is +// more compact in the way the data is structured here (with all the +// buckets in a single array separate from the Spans). +message BucketSpan { + sint32 offset = 1; // Gap to previous span, or starting point for 1st span (which can be negative). + uint32 length = 2; // Length of consecutive buckets. +} + +// TimeSeries represents samples and labels for a single time series. +message TimeSeries { + // For a timeseries to be valid, and for the samples and exemplars + // to be ingested by the remote system properly, the labels field is required. + repeated Label labels = 1 [(gogoproto.nullable) = false]; + repeated Sample samples = 2 [(gogoproto.nullable) = false]; + repeated Exemplar exemplars = 3 [(gogoproto.nullable) = false]; + repeated Histogram histograms = 4 [(gogoproto.nullable) = false]; +} + +message Label { + string name = 1; + string value = 2; +} + +message Labels { + repeated Label labels = 1 [(gogoproto.nullable) = false]; +} + +// Matcher specifies a rule, which can match or set of labels or not. +message LabelMatcher { + enum Type { + EQ = 0; + NEQ = 1; + RE = 2; + NRE = 3; + } + Type type = 1; + string name = 2; + string value = 3; +} + +message ReadHints { + int64 step_ms = 1; // Query step size in milliseconds. + string func = 2; // String representation of surrounding function or aggregation. + int64 start_ms = 3; // Start time in milliseconds. + int64 end_ms = 4; // End time in milliseconds. + repeated string grouping = 5; // List of label names used in aggregation. + bool by = 6; // Indicate whether it is without or by. + int64 range_ms = 7; // Range vector selector range in milliseconds. +} + +// Chunk represents a TSDB chunk. +// Time range [min, max] is inclusive. +message Chunk { + int64 min_time_ms = 1; + int64 max_time_ms = 2; + + // We require this to match chunkenc.Encoding. + enum Encoding { + UNKNOWN = 0; + XOR = 1; + HISTOGRAM = 2; + FLOAT_HISTOGRAM = 3; + } + Encoding type = 3; + bytes data = 4; +} + +// ChunkedSeries represents single, encoded time series. +message ChunkedSeries { + // Labels should be sorted. + repeated Label labels = 1 [(gogoproto.nullable) = false]; + // Chunks will be in start time order and may overlap. + repeated Chunk chunks = 2 [(gogoproto.nullable) = false]; +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b18207e55ad..d985595154c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -607,6 +607,10 @@ if (TARGET ch_contrib::usearch) dbms_target_link_libraries(PUBLIC ch_contrib::usearch) endif() +if (TARGET ch_contrib::prometheus_protobufs) + dbms_target_link_libraries (PUBLIC ch_contrib::prometheus_protobufs) +endif() + if (TARGET ch_rust::skim) dbms_target_include_directories(PRIVATE $) dbms_target_link_libraries(PUBLIC ch_rust::skim) diff --git a/src/Common/config.h.in b/src/Common/config.h.in index ad2ca2652d1..f68701d5d10 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -63,6 +63,7 @@ #cmakedefine01 USE_BCRYPT #cmakedefine01 USE_LIBARCHIVE #cmakedefine01 USE_POCKETFFT +#cmakedefine01 USE_PROMETHEUS_PROTOBUFS /// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO. /// That's why we use absolute paths. diff --git a/src/configure_config.cmake b/src/configure_config.cmake index a3f6dae4b87..75f61baa854 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -170,5 +170,8 @@ endif() if (TARGET ch_contrib::pocketfft) set(USE_POCKETFFT 1) endif() +if (TARGET ch_contrib::prometheus_protobufs) + set(USE_PROMETHEUS_PROTOBUFS 1) +endif() set(SOURCE_DIR ${PROJECT_SOURCE_DIR}) From 1ef5bca59164bd4ec00743e0f6a5d7cf17c077ef Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 18 Feb 2024 12:46:21 +0100 Subject: [PATCH 397/439] Fix cmake function PROTOBUF_GENERATE_CPP(): now it returns correct paths in SRCS and HDRS even if input ".proto" files are located in sibling directories. --- .../protobuf_generate.cmake | 51 +++++++++++++++---- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/contrib/google-protobuf-cmake/protobuf_generate.cmake b/contrib/google-protobuf-cmake/protobuf_generate.cmake index 3e30b4e40fd..0731a81aeb8 100644 --- a/contrib/google-protobuf-cmake/protobuf_generate.cmake +++ b/contrib/google-protobuf-cmake/protobuf_generate.cmake @@ -157,15 +157,13 @@ function(protobuf_generate) set(_generated_srcs_all) foreach(_proto ${protobuf_generate_PROTOS}) - get_filename_component(_abs_file ${_proto} ABSOLUTE) - get_filename_component(_abs_dir ${_abs_file} DIRECTORY) - get_filename_component(_basename ${_proto} NAME_WE) - file(RELATIVE_PATH _rel_dir ${CMAKE_CURRENT_SOURCE_DIR} ${_abs_dir}) - - set(_possible_rel_dir) - if (NOT protobuf_generate_APPEND_PATH) - set(_possible_rel_dir ${_rel_dir}/) - endif() + # The protobuf compiler doesn't return paths to the files it generates so we have to calculate those paths here: + # _abs_file - absolute path to a .proto file, + # _possible_rel_dir - relative path to the .proto file from some import directory specified in Protobuf_IMPORT_DIRS, + # _basename - filename of the .proto file (without path and without extenstion). + get_proto_absolute_path(_abs_file "${_proto}" ${_protobuf_include_path}) + get_proto_relative_path(_possible_rel_dir "${_abs_file}" ${_protobuf_include_path}) + get_filename_component(_basename "${_abs_file}" NAME_WE) set(_generated_srcs) foreach(_ext ${protobuf_generate_GENERATE_EXTENSIONS}) @@ -173,7 +171,7 @@ function(protobuf_generate) endforeach() if(protobuf_generate_DESCRIPTORS AND protobuf_generate_LANGUAGE STREQUAL cpp) - set(_descriptor_file "${CMAKE_CURRENT_BINARY_DIR}/${_basename}.desc") + set(_descriptor_file "${protobuf_generate_PROTOC_OUT_DIR}/${_possible_rel_dir}${_basename}.desc") set(_dll_desc_out "--descriptor_set_out=${_descriptor_file}") list(APPEND _generated_srcs ${_descriptor_file}) endif() @@ -196,3 +194,36 @@ function(protobuf_generate) target_sources(${protobuf_generate_TARGET} PRIVATE ${_generated_srcs_all}) endif() endfunction() + +# Calculates the absolute path to a .proto file. +function(get_proto_absolute_path result proto) + cmake_path(IS_ABSOLUTE proto _is_abs_path) + if(_is_abs_path) + set(${result} "${proto}" PARENT_SCOPE) + return() + endif() + foreach(_include_dir ${ARGN}) + if(EXISTS "${_include_dir}/${proto}") + set(${result} "${_include_dir}/${proto}" PARENT_SCOPE) + return() + endif() + endforeach() + message(SEND_ERROR "Not found protobuf ${proto} in Protobuf_IMPORT_DIRS: ${ARGN}") +endfunction() + +# Calculates a relative path to a .proto file. The returned path is relative to one of include directories. +function(get_proto_relative_path result abs_path) + set(${result} "" PARENT_SCOPE) + get_filename_component(_abs_dir "${abs_path}" DIRECTORY) + foreach(_include_dir ${ARGN}) + cmake_path(IS_PREFIX _include_dir "${_abs_dir}" _is_prefix) + if(_is_prefix) + file(RELATIVE_PATH _rel_dir "${_include_dir}" "${_abs_dir}") + if(NOT _rel_dir STREQUAL "") + set(${result} "${_rel_dir}/" PARENT_SCOPE) + endif() + return() + endif() + endforeach() + message(WARNING "Not found protobuf ${abs_path} in Protobuf_IMPORT_DIRS: ${ARGN}") +endfunction() From d777a7a9415c9f630e8df01cd42fc0d424cd4daf Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 5 Jul 2024 08:38:09 +0100 Subject: [PATCH 398/439] Reduce one more time --- src/Core/ServerSettings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 6c62ab6def8..d473810bcb8 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -153,7 +153,7 @@ namespace DB M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ - M(Double, gwp_asan_force_sample_probability, 0.0005, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ + M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp From 1f07f33bd259e2fc8b6ff930f3eb8dd72d255865 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 5 Jul 2024 12:18:18 +0200 Subject: [PATCH 399/439] Less dirty still hack --- docker/test/fasttest/Dockerfile | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index dba31525b78..cdbfc3f0beb 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -38,14 +38,8 @@ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake # LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path. -# It's very dirty workaround, better to build compiler and LLVM ourself and use it. -RUN ls /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu 2>/dev/null 1>/dev/null \ - && ln -s /usr/lib/llvm-18/lib/clang/18/lib/aarch64-unknown-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null \ - && cd /usr/lib/llvm-18/lib/clang/18/lib/linux && rename 's/.a.syms$/-aarch64.a.syms/' *.a.syms && rename 's/.a$/-aarch64.a/' *.a && rename 's/.so$/-aarch64.so/' *.so && rename 's/.o$/-aarch64.o/' *.o ||: - -RUN ls /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu 2>/dev/null 1>/dev/null \ - && ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/linux 2>/dev/null \ - && cd /usr/lib/llvm-18/lib/clang/18/lib/linux && rename 's/.a.syms$/-x86_64.a.syms/' *.a.syms && rename 's/.a$/-x86_64.a/' *.a && rename 's/.so$/-x86_64.so/' *.so && rename 's/.o$/-x86_64.o/' *.o ||: +# It's very dirty workaround, better to build compiler and LLVM ourself and use it. Details: https://github.com/llvm/llvm-project/issues/95792 +RUN test ! -d /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu || mv /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/x86_64-unknown-linux-gnu ARG CCACHE_VERSION=4.6.1 RUN mkdir /tmp/ccache \ From fddbe5222732ffee676457dfe76f88c44791e043 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 Jul 2024 10:32:53 +0000 Subject: [PATCH 400/439] Disable merge filters optimization by default. --- src/Core/Settings.h | 1 + .../QueryPlan/Optimizations/Optimizations.h | 7 +++- .../QueryPlanOptimizationSettings.cpp | 2 ++ .../QueryPlanOptimizationSettings.h | 3 ++ .../Optimizations/mergeExpressions.cpp | 19 ++++++++-- .../01655_plan_optimizations.reference | 1 + ...01655_plan_optimizations_merge_filters.sql | 2 ++ .../02496_remove_redundant_sorting.reference | 13 +++---- ...rouping_sets_predicate_push_down.reference | 36 +++++++++++-------- 9 files changed, 61 insertions(+), 23 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f6d282792db..591cf2be4c9 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -758,6 +758,7 @@ class IColumn; M(Bool, query_plan_push_down_limit, true, "Allow to move LIMITs down in the query plan", 0) \ M(Bool, query_plan_split_filter, true, "Allow to split filters in the query plan", 0) \ M(Bool, query_plan_merge_expressions, true, "Allow to merge expressions in the query plan", 0) \ + M(Bool, query_plan_merge_filters, false, "Allow to merge filters in the query plan", 0) \ M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \ M(Bool, query_plan_convert_outer_join_to_inner_join, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values", 0) \ M(Bool, query_plan_optimize_prewhere, true, "Allow to push down filter to PREWHERE expression for supported storages", 0) \ diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index b1ab5561958..c48bdf1552a 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -46,6 +46,10 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes); /// Replace chain `FilterStep -> ExpressionStep` to single FilterStep size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &); +/// Replace chain `FilterStep -> FilterStep` to single FilterStep +/// Note: this breaks short-circuit logic, so it is disabled for now. +size_t tryMergeFilters(QueryPlan::Node * parent_node, QueryPlan::Nodes &); + /// Move FilterStep down if possible. /// May split FilterStep and push down only part of it. size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes); @@ -81,11 +85,12 @@ size_t tryAggregatePartitionsIndependently(QueryPlan::Node * node, QueryPlan::No inline const auto & getOptimizations() { - static const std::array optimizations = {{ + static const std::array optimizations = {{ {tryLiftUpArrayJoin, "liftUpArrayJoin", &QueryPlanOptimizationSettings::lift_up_array_join}, {tryPushDownLimit, "pushDownLimit", &QueryPlanOptimizationSettings::push_down_limit}, {trySplitFilter, "splitFilter", &QueryPlanOptimizationSettings::split_filter}, {tryMergeExpressions, "mergeExpressions", &QueryPlanOptimizationSettings::merge_expressions}, + {tryMergeFilters, "mergeFilters", &QueryPlanOptimizationSettings::merge_filters}, {tryPushDownFilter, "pushDownFilter", &QueryPlanOptimizationSettings::filter_push_down}, {tryConvertOuterJoinToInnerJoin, "convertOuterJoinToInnerJoin", &QueryPlanOptimizationSettings::convert_outer_join_to_inner_join}, {tryExecuteFunctionsAfterSorting, "liftUpFunctions", &QueryPlanOptimizationSettings::execute_functions_after_sorting}, diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp index 2738de1ff5f..4d984133efd 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp @@ -20,6 +20,8 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const settings.merge_expressions = from.query_plan_enable_optimizations && from.query_plan_merge_expressions; + settings.merge_filters = from.query_plan_enable_optimizations && from.query_plan_merge_filters; + settings.filter_push_down = from.query_plan_enable_optimizations && from.query_plan_filter_push_down; settings.convert_outer_join_to_inner_join = from.query_plan_enable_optimizations && from.query_plan_convert_outer_join_to_inner_join; diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h index 85042cea4ed..539ff2eafbb 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h @@ -31,6 +31,9 @@ struct QueryPlanOptimizationSettings /// If merge-expressions optimization is enabled. bool merge_expressions = true; + /// If merge-filters optimization is enabled. + bool merge_filters = false; + /// If filter push down optimization is enabled. bool filter_push_down = true; diff --git a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp index 6ace1b3b5ce..118abdd701f 100644 --- a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp +++ b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp @@ -34,7 +34,6 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) auto * parent_expr = typeid_cast(parent.get()); auto * parent_filter = typeid_cast(parent.get()); auto * child_expr = typeid_cast(child.get()); - auto * child_filter = typeid_cast(child.get()); if (parent_expr && child_expr) { @@ -76,7 +75,23 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) parent_node->children.swap(child_node->children); return 1; } - else if (parent_filter && child_filter) + + return 0; +} +size_t tryMergeFilters(QueryPlan::Node * parent_node, QueryPlan::Nodes &) +{ + if (parent_node->children.size() != 1) + return false; + + QueryPlan::Node * child_node = parent_node->children.front(); + + auto & parent = parent_node->step; + auto & child = child_node->step; + + auto * parent_filter = typeid_cast(parent.get()); + auto * child_filter = typeid_cast(child.get()); + + if (parent_filter && child_filter) { const auto & child_actions = child_filter->getExpression(); const auto & parent_actions = parent_filter->getExpression(); diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index a6af1f2170d..edf93b4b39f 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -163,6 +163,7 @@ Filter column: notEquals(__table1.y, 2_UInt8) > filter is pushed down before CreatingSets CreatingSets Filter +Filter 1 3 > one condition of filter is pushed down before LEFT JOIN diff --git a/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql index 1301135b4cb..2193fc7a8f4 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql +++ b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql @@ -1,3 +1,5 @@ +set query_plan_merge_filters=1; + set allow_experimental_analyzer=1; select explain from (explain actions = 1 select * from (select sum(number) as v, bitAnd(number, 15) as key from numbers(1e8) group by key having v != 0) where key = 7) where explain like '%Filter%' or explain like '%Aggregating%'; diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index 4a4e898c5bd..77ef213b36d 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -332,12 +332,13 @@ SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it d Expression (Projection) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) - Filter (((WHERE + (Projection + Before ORDER BY)) + HAVING)) - Aggregating - Expression ((Before GROUP BY + Projection)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - ReadFromSystemNumbers + Filter ((WHERE + (Projection + Before ORDER BY))) + Filter (HAVING) + Aggregating + Expression ((Before GROUP BY + Projection)) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + ReadFromSystemNumbers -- execute 1 2 diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference index 70bcd7f255b..9bb0c022752 100644 --- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference +++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference @@ -29,16 +29,20 @@ WHERE type_1 = \'all\' ExpressionTransform × 2 (Filter) FilterTransform × 2 - (Aggregating) - ExpressionTransform × 2 - AggregatingTransform × 2 - Copy 1 → 2 - (Expression) - ExpressionTransform - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Filter) + FilterTransform × 2 + (Filter) + FilterTransform × 2 + (Aggregating) + ExpressionTransform × 2 + AggregatingTransform × 2 + Copy 1 → 2 + (Expression) + ExpressionTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Filter) @@ -64,10 +68,14 @@ ExpressionTransform × 2 ExpressionTransform × 2 AggregatingTransform × 2 Copy 1 → 2 - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Filter) + FilterTransform + (Filter) + FilterTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Aggregating) From e084496d5f0d1dc2c60c84c5b9d1d9c2aa53fa21 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 Jul 2024 10:35:19 +0000 Subject: [PATCH 401/439] Add a test. --- .../03199_merge_filters_bug.reference | 0 .../0_stateless/03199_merge_filters_bug.sql | 70 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 tests/queries/0_stateless/03199_merge_filters_bug.reference create mode 100644 tests/queries/0_stateless/03199_merge_filters_bug.sql diff --git a/tests/queries/0_stateless/03199_merge_filters_bug.reference b/tests/queries/0_stateless/03199_merge_filters_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03199_merge_filters_bug.sql b/tests/queries/0_stateless/03199_merge_filters_bug.sql new file mode 100644 index 00000000000..ed2ec2ea217 --- /dev/null +++ b/tests/queries/0_stateless/03199_merge_filters_bug.sql @@ -0,0 +1,70 @@ +drop table if exists t1; +drop table if exists t2; + +CREATE TABLE t1 +( + `s1` String, + `s2` String, + `s3` String +) +ENGINE = MergeTree +ORDER BY tuple(); + + +CREATE TABLE t2 +( + `fs1` FixedString(10), + `fs2` FixedString(10) +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO t1 SELECT + repeat('t', 15) s1, + 'test' s2, + 'test' s3; + +INSERT INTO t1 SELECT + substring(s1, 1, 10), + s2, + s3 +FROM generateRandom('s1 String, s2 String, s3 String') +LIMIT 10000; + +INSERT INTO t2 SELECT * +FROM generateRandom() +LIMIT 10000; + +WITH +tmp1 AS +( + SELECT + CAST(s1, 'FixedString(10)') AS fs1, + s2 AS sector, + s3 + FROM t1 + WHERE (s3 != 'test') +) + SELECT + fs1 + FROM t2 + LEFT JOIN tmp1 USING (fs1) + WHERE (fs1 IN ('test')) SETTINGS enable_multiple_prewhere_read_steps = 0; + +optimize table t1 final; + +WITH +tmp1 AS +( + SELECT + CAST(s1, 'FixedString(10)') AS fs1, + s2 AS sector, + s3 + FROM t1 + WHERE (s3 != 'test') +) + SELECT + fs1 + FROM t2 + LEFT JOIN tmp1 USING (fs1) + WHERE (fs1 IN ('test')); From b189902a365e6f86759e0b1c4b64b852d3aaf843 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 5 Jul 2024 10:50:22 +0000 Subject: [PATCH 402/439] Update version_date.tsv and changelogs after v24.6.2.17-stable --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v24.6.2.17-stable.md | 26 ++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 ++ 5 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v24.6.2.17-stable.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 018fe57bf56..c59ef1b919a 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.6.1.4423" +ARG VERSION="24.6.2.17" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index a86406e5129..240df79aeb1 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.6.1.4423" +ARG VERSION="24.6.2.17" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 25f3273a648..ac64655991a 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.6.1.4423" +ARG VERSION="24.6.2.17" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" #docker-official-library:off diff --git a/docs/changelogs/v24.6.2.17-stable.md b/docs/changelogs/v24.6.2.17-stable.md new file mode 100644 index 00000000000..820937f6291 --- /dev/null +++ b/docs/changelogs/v24.6.2.17-stable.md @@ -0,0 +1,26 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.6.2.17-stable (5710a8b5c0c) FIXME as compared to v24.6.1.4423-stable (dcced7c8478) + +#### New Feature +* Backported in [#66002](https://github.com/ClickHouse/ClickHouse/issues/66002): Add AzureQueue storage. [#65458](https://github.com/ClickHouse/ClickHouse/pull/65458) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Improvement +* Backported in [#65898](https://github.com/ClickHouse/ClickHouse/issues/65898): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#65935](https://github.com/ClickHouse/ClickHouse/issues/65935): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#65907](https://github.com/ClickHouse/ClickHouse/issues/65907): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#65962](https://github.com/ClickHouse/ClickHouse/issues/65962): Add missing workload identity changes. [#65848](https://github.com/ClickHouse/ClickHouse/pull/65848) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Backported in [#66033](https://github.com/ClickHouse/ClickHouse/issues/66033): Follow up to [#65046](https://github.com/ClickHouse/ClickHouse/issues/65046). [#65928](https://github.com/ClickHouse/ClickHouse/pull/65928) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#66076](https://github.com/ClickHouse/ClickHouse/issues/66076): Fix support of non-const scale arguments in rounding functions. [#65983](https://github.com/ClickHouse/ClickHouse/pull/65983) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Backported in [#66017](https://github.com/ClickHouse/ClickHouse/issues/66017): Fix race in s3queue. [#65986](https://github.com/ClickHouse/ClickHouse/pull/65986) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8112ed9083b..271065a78fb 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 @@ -6,6 +7,7 @@ v24.5.1.1763-stable 2024-06-01 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.5.46-lts 2024-07-03 v24.3.4.147-lts 2024-06-13 v24.3.3.102-lts 2024-05-01 v24.3.2.23-lts 2024-04-03 From 23f3f36207c567427b70fc84a0557cd5ebcc4d31 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 Jul 2024 11:18:10 +0000 Subject: [PATCH 403/439] update settings history. --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 60da43afbde..ddcfbb2eab5 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -62,6 +62,7 @@ static std::initializer_list Date: Fri, 5 Jul 2024 14:58:17 +0200 Subject: [PATCH 404/439] Revert "insertion deduplication on retries for materialised views" --- src/Columns/ColumnObject.cpp | 6 - src/Columns/ColumnObject.h | 2 +- src/Common/CollectionOfDerived.h | 184 ---- src/Core/Settings.h | 6 +- src/Interpreters/AsynchronousInsertQueue.cpp | 19 +- src/Interpreters/InterpreterCheckQuery.cpp | 18 +- src/Interpreters/InterpreterCreateQuery.cpp | 9 +- src/Interpreters/InterpreterExplainQuery.cpp | 8 +- src/Interpreters/InterpreterInsertQuery.cpp | 682 ++++++------- src/Interpreters/InterpreterInsertQuery.h | 17 +- src/Interpreters/Squashing.cpp | 124 ++- src/Interpreters/Squashing.h | 50 +- src/Interpreters/SystemLog.cpp | 8 +- src/Interpreters/TreeRewriter.cpp | 2 +- src/Processors/Chunk.cpp | 20 +- src/Processors/Chunk.h | 58 +- .../PullingAsyncPipelineExecutor.cpp | 9 +- .../Executors/PullingPipelineExecutor.cpp | 9 +- .../Formats/Impl/ParquetBlockOutputFormat.cpp | 4 +- src/Processors/IAccumulatingTransform.cpp | 5 +- .../FinishAggregatingInOrderAlgorithm.cpp | 10 +- .../Algorithms/MergeTreePartLevelInfo.h | 12 +- .../Algorithms/ReplacingSortedAlgorithm.cpp | 2 +- .../Algorithms/ReplacingSortedAlgorithm.h | 7 +- src/Processors/Merges/IMergingTransform.cpp | 2 +- src/Processors/Merges/IMergingTransform.h | 2 +- src/Processors/Sinks/RemoteSink.h | 2 +- src/Processors/Sinks/SinkToStorage.cpp | 5 +- src/Processors/Sinks/SinkToStorage.h | 5 +- src/Processors/Sources/BlocksSource.h | 5 +- src/Processors/Sources/RemoteSource.cpp | 2 +- .../Sources/SourceFromSingleChunk.cpp | 6 +- .../AggregatingInOrderTransform.cpp | 9 +- .../Transforms/AggregatingInOrderTransform.h | 5 +- .../Transforms/AggregatingTransform.cpp | 16 +- .../Transforms/AggregatingTransform.h | 3 +- .../Transforms/ApplySquashingTransform.h | 14 +- .../Transforms/CountingTransform.cpp | 3 +- .../DeduplicationTokenTransforms.cpp | 236 ----- .../Transforms/DeduplicationTokenTransforms.h | 237 ----- .../Transforms/ExpressionTransform.cpp | 2 - .../Transforms/JoiningTransform.cpp | 9 +- src/Processors/Transforms/JoiningTransform.h | 6 +- .../Transforms/MaterializingTransform.cpp | 1 - .../Transforms/MemoryBoundMerging.h | 6 +- ...gingAggregatedMemoryEfficientTransform.cpp | 36 +- ...ergingAggregatedMemoryEfficientTransform.h | 5 +- .../Transforms/MergingAggregatedTransform.cpp | 10 +- .../Transforms/PlanSquashingTransform.cpp | 15 +- .../Transforms/PlanSquashingTransform.h | 3 +- .../Transforms/SelectByIndicesTransform.h | 3 +- .../Transforms/SquashingTransform.cpp | 18 +- .../Transforms/TotalsHavingTransform.cpp | 6 +- .../Transforms/buildPushingToViewsChain.cpp | 127 +-- src/QueryPipeline/QueryPipelineBuilder.h | 2 +- src/QueryPipeline/QueryPlanResourceHolder.cpp | 8 +- src/QueryPipeline/QueryPlanResourceHolder.h | 3 - src/Server/TCPHandler.cpp | 20 +- src/Storages/Distributed/DistributedSink.cpp | 20 +- src/Storages/Distributed/DistributedSink.h | 2 +- src/Storages/FileLog/StorageFileLog.cpp | 9 +- src/Storages/Kafka/StorageKafka.cpp | 8 +- src/Storages/LiveView/LiveViewSink.h | 4 +- src/Storages/LiveView/StorageLiveView.cpp | 18 +- src/Storages/LiveView/StorageLiveView.h | 2 +- src/Storages/MaterializedView/RefreshTask.cpp | 8 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 17 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 1 - .../MergeTree/MergeTreeSelectProcessor.cpp | 6 +- .../MergeTree/MergeTreeSequentialSource.cpp | 5 +- src/Storages/MergeTree/MergeTreeSink.cpp | 68 +- src/Storages/MergeTree/MergeTreeSink.h | 3 +- src/Storages/MergeTree/MutateTask.cpp | 19 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 94 +- .../MergeTree/ReplicatedMergeTreeSink.h | 13 +- src/Storages/MessageQueueSink.cpp | 2 +- src/Storages/MessageQueueSink.h | 2 +- src/Storages/NATS/StorageNATS.cpp | 8 +- .../StorageObjectStorageSink.cpp | 4 +- .../ObjectStorage/StorageObjectStorageSink.h | 2 +- .../StorageObjectStorageQueue.cpp | 8 +- src/Storages/PartitionedSink.cpp | 4 +- src/Storages/PartitionedSink.h | 2 +- .../MaterializedPostgreSQLConsumer.cpp | 8 +- .../PostgreSQLReplicationHandler.cpp | 8 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 8 +- .../RocksDB/EmbeddedRocksDBBulkSink.cpp | 7 +- .../RocksDB/EmbeddedRocksDBBulkSink.h | 2 +- src/Storages/RocksDB/EmbeddedRocksDBSink.cpp | 2 +- src/Storages/RocksDB/EmbeddedRocksDBSink.h | 2 +- .../RocksDB/StorageEmbeddedRocksDB.cpp | 3 +- src/Storages/StorageBuffer.cpp | 10 +- src/Storages/StorageDistributed.cpp | 8 +- src/Storages/StorageFile.cpp | 4 +- src/Storages/StorageKeeperMap.cpp | 9 +- src/Storages/StorageLog.cpp | 8 +- src/Storages/StorageMemory.cpp | 2 +- src/Storages/StorageMongoDB.cpp | 5 +- src/Storages/StorageMySQL.cpp | 4 +- src/Storages/StoragePostgreSQL.cpp | 4 +- src/Storages/StorageRedis.cpp | 9 +- src/Storages/StorageSQLite.cpp | 2 +- src/Storages/StorageSet.cpp | 6 +- src/Storages/StorageStripeLog.cpp | 4 +- src/Storages/StorageURL.cpp | 4 +- src/Storages/StorageURL.h | 2 +- .../System/StorageSystemZooKeeper.cpp | 2 +- src/Storages/WindowView/StorageWindowView.cpp | 59 +- src/Storages/WindowView/StorageWindowView.h | 2 +- .../test_force_deduplication/test.py | 85 +- ...view_and_deduplication_zookeeper.reference | 2 +- ...lized_view_and_deduplication_zookeeper.sql | 2 +- ...lized_view_and_too_many_parts_zookeeper.sh | 4 +- .../0_stateless/01275_parallel_mv.reference | 4 +- ...01927_query_views_log_current_database.sql | 1 - ...ication_token_materialized_views.reference | 14 +- ...deduplication_token_materialized_views.sql | 8 +- .../0_stateless/02125_query_views_log.sql | 2 +- ...02912_ingestion_mv_deduplication.reference | 5 +- .../02912_ingestion_mv_deduplication.sql | 5 +- .../0_stateless/03008_deduplication.python | 657 ------------ ...08_deduplication_cases_from_docs.reference | 41 - .../03008_deduplication_cases_from_docs.sql | 331 ------ ...on_insert_into_partitioned_table.reference | 35 - ...lication_insert_into_partitioned_table.sql | 83 -- ...ert_several_blocks_nonreplicated.reference | 962 ------------------ ...ion_insert_several_blocks_nonreplicated.sh | 59 -- ...insert_several_blocks_replicated.reference | 962 ------------------ ...cation_insert_several_blocks_replicated.sh | 59 -- ...tes_several_blocks_nonreplicated.reference | 962 ------------------ ..._generates_several_blocks_nonreplicated.sh | 59 -- ...erates_several_blocks_replicated.reference | 962 ------------------ ..._mv_generates_several_blocks_replicated.sh | 59 -- ..._mv_into_one_table_nonreplicated.reference | 706 ------------- ...several_mv_into_one_table_nonreplicated.sh | 59 -- ...ral_mv_into_one_table_replicated.reference | 706 ------------- ...on_several_mv_into_one_table_replicated.sh | 59 -- .../03035_max_insert_threads_support.sh | 2 +- 138 files changed, 865 insertions(+), 8646 deletions(-) delete mode 100644 src/Common/CollectionOfDerived.h delete mode 100644 src/Processors/Transforms/DeduplicationTokenTransforms.cpp delete mode 100644 src/Processors/Transforms/DeduplicationTokenTransforms.h delete mode 100644 tests/queries/0_stateless/03008_deduplication.python delete mode 100644 tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference delete mode 100644 tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql delete mode 100644 tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference delete mode 100644 tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql delete mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh delete mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh delete mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh delete mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh delete mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh delete mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index ded56b60e64..90ef974010c 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -1093,10 +1093,4 @@ void ColumnObject::finalize() checkObjectHasNoAmbiguosPaths(getKeys()); } -void ColumnObject::updateHashFast(SipHash & hash) const -{ - for (const auto & entry : subcolumns) - for (auto & part : entry->data.data) - part->updateHashFast(hash); -} } diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index b1b8827622f..e2936b27994 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -242,7 +242,7 @@ public: const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); } - void updateHashFast(SipHash & hash) const override; + void updateHashFast(SipHash &) const override { throwMustBeConcrete(); } void expand(const Filter &, bool) override { throwMustBeConcrete(); } bool hasEqualValues() const override { throwMustBeConcrete(); } size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); } diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h deleted file mode 100644 index 97c0c3fbc06..00000000000 --- a/src/Common/CollectionOfDerived.h +++ /dev/null @@ -1,184 +0,0 @@ -#pragma once - -#include - -#include - -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -/* This is a collections of objects derived from ItemBase. -* Collection contains no more than one instance for each derived type. -* The derived type is used to access the instance. -*/ - -template -class CollectionOfDerivedItems -{ -public: - using Self = CollectionOfDerivedItems; - using ItemPtr = std::shared_ptr; - -private: - struct Rec - { - std::type_index type_idx; - ItemPtr ptr; - - bool operator<(const Rec & other) const - { - return type_idx < other.type_idx; - } - - bool operator<(const std::type_index & value) const - { - return type_idx < value; - } - - bool operator==(const Rec & other) const - { - return type_idx == other.type_idx; - } - }; - using Records = std::vector; - -public: - void swap(Self & other) noexcept - { - records.swap(other.records); - } - - void clear() - { - records.clear(); - } - - bool empty() const - { - return records.empty(); - } - - size_t size() const - { - return records.size(); - } - - Self clone() const - { - Self result; - result.records.reserve(records.size()); - for (const auto & rec : records) - result.records.emplace_back(rec.type_idx, rec.ptr->clone()); - return result; - } - - void append(Self && other) - { - auto middle_idx = records.size(); - std::move(other.records.begin(), other.records.end(), std::back_inserter(records)); - std::inplace_merge(records.begin(), records.begin() + middle_idx, records.end()); - chassert(isUniqTypes()); - } - - template - void add(std::shared_ptr info) - { - static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); - return addImpl(std::type_index(typeid(T)), std::move(info)); - } - - template - std::shared_ptr get() const - { - static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); - auto it = getImpl(std::type_index(typeid(T))); - if (it == records.cend()) - return nullptr; - auto cast = std::dynamic_pointer_cast(it->ptr); - chassert(cast); - return cast; - } - - template - std::shared_ptr extract() - { - static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); - auto it = getImpl(std::type_index(typeid(T))); - if (it == records.cend()) - return nullptr; - auto cast = std::dynamic_pointer_cast(it->ptr); - chassert(cast); - - records.erase(it); - return cast; - } - - std::string debug() const - { - std::string result; - - for (auto & rec : records) - { - result.append(rec.type_idx.name()); - result.append(" "); - } - - return result; - } - -private: - bool isUniqTypes() const - { - auto uniq_it = std::adjacent_find(records.begin(), records.end()); - - return uniq_it == records.end(); - } - - void addImpl(std::type_index type_idx, ItemPtr item) - { - auto it = std::lower_bound(records.begin(), records.end(), type_idx); - - if (it == records.end()) - { - records.emplace_back(type_idx, item); - return; - } - - if (it->type_idx == type_idx) - throw Exception(ErrorCodes::LOGICAL_ERROR, "inserted items must be unique by their type, type {} is inserted twice", type_idx.name()); - - - records.emplace(it, type_idx, item); - - chassert(isUniqTypes()); - } - - Records::const_iterator getImpl(std::type_index type_idx) const - { - auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx); - - if (it == records.cend()) - return records.cend(); - - if (it->type_idx != type_idx) - return records.cend(); - - return it; - } - - Records records; -}; - -} diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 81d0aa0c51d..5903dbd32eb 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -36,7 +36,7 @@ class IColumn; M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ - M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size in rows for reading", 0) \ + M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \ M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \ M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \ @@ -634,8 +634,9 @@ class IColumn; M(Bool, optimize_time_filter_with_preimage, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')", 0) \ M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ - M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \ + M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ + M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ @@ -952,7 +953,6 @@ class IColumn; #define OBSOLETE_SETTINGS(M, ALIAS) \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ - MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 1) \ MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \ MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \ MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \ diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index dd1166a9228..d72f3d81549 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -301,13 +301,7 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const auto & insert_query = query->as(); insert_query.async_insert_flush = true; - InterpreterInsertQuery interpreter( - query, - query_context, - query_context->getSettingsRef().insert_allow_materialized_columns, - /* no_squash */ false, - /* no_destination */ false, - /* async_insert */ false); + InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns); auto table = interpreter.getTable(insert_query); auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); @@ -787,12 +781,7 @@ try try { interpreter = std::make_unique( - key.query, - insert_context, - key.settings.insert_allow_materialized_columns, - false, - false, - true); + key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true); pipeline = interpreter->execute().pipeline; chassert(pipeline.pushing()); @@ -1011,7 +1000,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( } Chunk chunk(executor.getResultColumns(), total_rows); - chunk.getChunkInfos().add(std::move(chunk_info)); + chunk.setChunkInfo(std::move(chunk_info)); return chunk; } @@ -1063,7 +1052,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries( } Chunk chunk(std::move(result_columns), total_rows); - chunk.getChunkInfos().add(std::move(chunk_info)); + chunk.setChunkInfo(std::move(chunk_info)); return chunk; } diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index 81bb6290acb..4a84a7bf570 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -2,7 +2,6 @@ #include #include -#include #include @@ -23,7 +22,6 @@ #include #include -#include #include #include #include @@ -93,7 +91,7 @@ Chunk getChunkFromCheckResult(const String & database, const String & table, con return Chunk(std::move(columns), 1); } -class TableCheckTask : public ChunkInfoCloneable +class TableCheckTask : public ChunkInfo { public: TableCheckTask(StorageID table_id, const std::variant & partition_or_part, ContextPtr context) @@ -112,12 +110,6 @@ public: context->checkAccess(AccessType::SHOW_TABLES, table_->getStorageID()); } - TableCheckTask(const TableCheckTask & other) - : table(other.table) - , check_data_tasks(other.check_data_tasks) - , is_finished(other.is_finished.load()) - {} - std::optional checkNext() const { if (isFinished()) @@ -129,8 +121,8 @@ public: std::this_thread::sleep_for(sleep_time); }); - IStorage::DataValidationTasksPtr tmp = check_data_tasks; - auto result = table->checkDataNext(tmp); + IStorage::DataValidationTasksPtr check_data_tasks_ = check_data_tasks; + auto result = table->checkDataNext(check_data_tasks_); is_finished = !result.has_value(); return result; } @@ -188,7 +180,7 @@ protected: /// source should return at least one row to start pipeline result.addColumn(ColumnUInt8::create(1, 1)); /// actual data stored in chunk info - result.getChunkInfos().add(std::move(current_check_task)); + result.setChunkInfo(std::move(current_check_task)); return result; } @@ -288,7 +280,7 @@ public: protected: void transform(Chunk & chunk) override { - auto table_check_task = chunk.getChunkInfos().get(); + auto table_check_task = std::dynamic_pointer_cast(chunk.getChunkInfo()); auto check_result = table_check_task->checkNext(); if (!check_result) { diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ee191c02ff8..0ee2bb6c0e9 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1776,13 +1776,8 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) else insert->select = create.select->clone(); - return InterpreterInsertQuery( - insert, - getContext(), - getContext()->getSettingsRef().insert_allow_materialized_columns, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false).execute(); + return InterpreterInsertQuery(insert, getContext(), + getContext()->getSettingsRef().insert_allow_materialized_columns).execute(); } return {}; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 26b7e074fdf..7c7b4b3f95a 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -534,13 +534,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl() } else if (dynamic_cast(ast.getExplainedQuery().get())) { - InterpreterInsertQuery insert( - ast.getExplainedQuery(), - getContext(), - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext()); auto io = insert.execute(); printPipeline(io.pipeline.getProcessors(), buf); } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2cbfc55d008..f396db70d21 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -27,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -40,7 +38,6 @@ #include #include #include -#include "base/defines.h" namespace ProfileEvents @@ -397,358 +394,28 @@ Chain InterpreterInsertQuery::buildPreSinkChain( return out; } -std::pair, std::vector> InterpreterInsertQuery::buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) -{ - chassert(presink_streams > 0); - chassert(sink_streams > 0); - - ThreadGroupPtr running_group; - if (current_thread) - running_group = current_thread->getThreadGroup(); - if (!running_group) - running_group = std::make_shared(getContext()); - - std::vector sink_chains; - std::vector presink_chains; - - for (size_t i = 0; i < sink_streams; ++i) - { - auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, - running_group, /* elapsed_counter_ms= */ nullptr); - - sink_chains.emplace_back(std::move(out)); - } - - for (size_t i = 0; i < presink_streams; ++i) - { - auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); - presink_chains.emplace_back(std::move(out)); - } - - return {std::move(presink_chains), std::move(sink_chains)}; -} - - -QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table) -{ - const Settings & settings = getContext()->getSettingsRef(); - - auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); - - bool is_trivial_insert_select = false; - - if (settings.optimize_trivial_insert_select) - { - const auto & select_query = query.select->as(); - const auto & selects = select_query.list_of_selects->children; - const auto & union_modes = select_query.list_of_modes; - - /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries - const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; - - is_trivial_insert_select = - std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) - && std::all_of(selects.begin(), selects.end(), isTrivialSelect); - } - - ContextPtr select_context = getContext(); - - if (is_trivial_insert_select) - { - /** When doing trivial INSERT INTO ... SELECT ... FROM table, - * don't need to process SELECT with more than max_insert_threads - * and it's reasonable to set block size for SELECT to the desired block size for INSERT - * to avoid unnecessary squashing. - */ - - Settings new_settings = select_context->getSettings(); - - new_settings.max_threads = std::max(1, settings.max_insert_threads); - - if (table->prefersLargeBlocks()) - { - if (settings.min_insert_block_size_rows) - new_settings.max_block_size = settings.min_insert_block_size_rows; - if (settings.min_insert_block_size_bytes) - new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; - } - - auto context_for_trivial_select = Context::createCopy(context); - context_for_trivial_select->setSettings(new_settings); - context_for_trivial_select->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); - - select_context = context_for_trivial_select; - } - - QueryPipelineBuilder pipeline; - - { - auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); - - if (settings.allow_experimental_analyzer) - { - InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, select_context, select_query_options); - pipeline = interpreter_select_analyzer.buildQueryPipeline(); - } - else - { - InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options); - pipeline = interpreter_select.buildQueryPipeline(); - } - } - - pipeline.dropTotalsAndExtremes(); - - /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. - if (getContext()->getSettingsRef().insert_null_as_default) - { - const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); - const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); - const auto & output_columns = metadata_snapshot->getColumns(); - - if (input_columns.size() == query_columns.size()) - { - for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) - { - /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with - /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. - if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) - && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) - && !isVariant(query_columns[col_idx].type) - && !isDynamic(query_columns[col_idx].type) - && output_columns.has(query_columns[col_idx].name)) - { - query_sample_block.setColumn( - col_idx, - ColumnWithTypeAndName( - makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), - makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), - query_columns[col_idx].name)); - } - } - } - } - - auto actions_dag = ActionsDAG::makeConvertingActions( - pipeline.getHeader().getColumnsWithTypeAndName(), - query_sample_block.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header, actions); - }); - - /// We need to convert Sparse columns to full, because it's destination storage - /// may not support it or may have different settings for applying Sparse serialization. - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - auto context_ptr = getContext(); - auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - - return counting; - }); - - size_t num_select_threads = pipeline.getNumThreads(); - - pipeline.resize(1); - - if (shouldAddSquashingFroStorage(table)) - { - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size, - table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL); - }); - } - - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - if (!settings.insert_deduplication_token.value.empty()) - { - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(settings.insert_deduplication_token.value, in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - } - - /// Number of streams works like this: - /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever - /// InterpreterSelectQuery ends up with. - /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. - /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. - /// * If the table supports parallel inserts, use max_insert_threads for writing to IStorage. - /// Otherwise ResizeProcessor them down to 1 stream. - - size_t presink_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); - - size_t sink_streams_size = table->supportsParallelInsert() ? std::max(1, settings.max_insert_threads) : 1; - - if (!settings.parallel_view_processing) - { - auto table_id = table->getStorageID(); - auto views = DatabaseCatalog::instance().getDependentViews(table_id); - - if (table->isView() || !views.empty()) - sink_streams_size = 1; - } - - auto [presink_chains, sink_chains] = buildPreAndSinkChains( - presink_streams_size, sink_streams_size, - table, metadata_snapshot, query_sample_block); - - pipeline.resize(presink_chains.size()); - - if (shouldAddSquashingFroStorage(table)) - { - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size, - table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL); - }); - } - - for (auto & chain : presink_chains) - pipeline.addResources(chain.detachResources()); - pipeline.addChains(std::move(presink_chains)); - - pipeline.resize(sink_streams_size); - - for (auto & chain : sink_chains) - pipeline.addResources(chain.detachResources()); - pipeline.addChains(std::move(sink_chains)); - - if (!settings.parallel_view_processing) - { - /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. - if (pipeline.getNumThreads() > num_select_threads) - pipeline.setMaxThreads(num_select_threads); - } - else if (pipeline.getNumThreads() < settings.max_threads) - { - /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, - /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. - /// - /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. - pipeline.setMaxThreads(settings.max_threads); - } - - pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr - { - return std::make_shared(cur_header); - }); - - return QueryPipelineBuilder::getPipeline(std::move(pipeline)); -} - - -QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query, StoragePtr table) -{ - const Settings & settings = getContext()->getSettingsRef(); - - auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); - - Chain chain; - - { - auto [presink_chains, sink_chains] = buildPreAndSinkChains( - /* presink_streams */1, /* sink_streams */1, - table, metadata_snapshot, query_sample_block); - - chain = std::move(presink_chains.front()); - chain.appendChain(std::move(sink_chains.front())); - } - - if (!settings.insert_deduplication_token.value.empty()) - { - chain.addSource(std::make_shared(chain.getInputHeader())); - chain.addSource(std::make_shared(settings.insert_deduplication_token.value, chain.getInputHeader())); - } - - chain.addSource(std::make_shared(chain.getInputHeader())); - - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - auto squashing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(squashing)); - - auto balancing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(balancing)); - } - - auto context_ptr = getContext(); - auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - chain.addSource(std::move(counting)); - - QueryPipeline pipeline = QueryPipeline(std::move(chain)); - - pipeline.setNumThreads(std::min(pipeline.getNumThreads(), settings.max_threads)); - pipeline.setConcurrencyControl(settings.use_concurrency_control); - - if (query.hasInlinedData() && !async_insert) - { - /// can execute without additional data - auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); - for (auto && buffer : owned_buffers) - format->addBuffer(std::move(buffer)); - - auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); - pipeline.complete(std::move(pipe)); - } - - return pipeline; -} - - BlockIO InterpreterInsertQuery::execute() { const Settings & settings = getContext()->getSettingsRef(); auto & query = query_ptr->as(); + QueryPipelineBuilder pipeline; + std::optional distributed_pipeline; + QueryPlanResourceHolder resources; StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); + StoragePtr inner_table; + if (const auto * mv = dynamic_cast(table.get())) + inner_table = mv->getTargetTable(); + if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout); - auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); /// For table functions we check access while executing @@ -756,43 +423,320 @@ BlockIO InterpreterInsertQuery::execute() if (!query.table_function) getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); - if (!allow_materialized) + if (query.select && settings.parallel_distributed_insert_select) + // Distributed INSERT SELECT + distributed_pipeline = table->distributedWrite(query, getContext()); + + std::vector presink_chains; + std::vector sink_chains; + if (!distributed_pipeline) { - for (const auto & column : metadata_snapshot->getColumns()) - if (column.default_desc.kind == ColumnDefaultKind::Materialized && query_sample_block.has(column.name)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); + /// Number of streams works like this: + /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever + /// InterpreterSelectQuery ends up with. + /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. + /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. + /// * If the table supports parallel inserts, use the same streams for writing to IStorage. + /// Otherwise ResizeProcessor them down to 1 stream. + /// * If it's not an INSERT SELECT, forget all that and use one stream. + size_t pre_streams_size = 1; + size_t sink_streams_size = 1; + + if (query.select) + { + bool is_trivial_insert_select = false; + + if (settings.optimize_trivial_insert_select) + { + const auto & select_query = query.select->as(); + const auto & selects = select_query.list_of_selects->children; + const auto & union_modes = select_query.list_of_modes; + + /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries + const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; + + is_trivial_insert_select = + std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) + && std::all_of(selects.begin(), selects.end(), isTrivialSelect); + } + + if (is_trivial_insert_select) + { + /** When doing trivial INSERT INTO ... SELECT ... FROM table, + * don't need to process SELECT with more than max_insert_threads + * and it's reasonable to set block size for SELECT to the desired block size for INSERT + * to avoid unnecessary squashing. + */ + + Settings new_settings = getContext()->getSettings(); + + new_settings.max_threads = std::max(1, settings.max_insert_threads); + + if (table->prefersLargeBlocks()) + { + if (settings.min_insert_block_size_rows) + new_settings.max_block_size = settings.min_insert_block_size_rows; + if (settings.min_insert_block_size_bytes) + new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; + } + + auto new_context = Context::createCopy(context); + new_context->setSettings(new_settings); + new_context->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); + + auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); + + if (settings.allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, new_context, select_query_options); + pipeline = interpreter_select_analyzer.buildQueryPipeline(); + } + else + { + InterpreterSelectWithUnionQuery interpreter_select(query.select, new_context, select_query_options); + pipeline = interpreter_select.buildQueryPipeline(); + } + } + else + { + /// Passing 1 as subquery_depth will disable limiting size of intermediate result. + auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); + + if (settings.allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, getContext(), select_query_options); + pipeline = interpreter_select_analyzer.buildQueryPipeline(); + } + else + { + InterpreterSelectWithUnionQuery interpreter_select(query.select, getContext(), select_query_options); + pipeline = interpreter_select.buildQueryPipeline(); + } + } + + pipeline.dropTotalsAndExtremes(); + + if (settings.max_insert_threads > 1) + { + auto table_id = table->getStorageID(); + auto views = DatabaseCatalog::instance().getDependentViews(table_id); + + /// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them. + /// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts. + const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert(); + pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads + : std::min(settings.max_insert_threads, pipeline.getNumStreams()); + + /// Deduplication when passing insert_deduplication_token breaks if using more than one thread + if (!settings.insert_deduplication_token.toString().empty()) + { + LOG_DEBUG( + getLogger("InsertQuery"), + "Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues"); + pre_streams_size = 1; + } + + if (table->supportsParallelInsert()) + sink_streams_size = pre_streams_size; + } + + pipeline.resize(pre_streams_size); + + /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. + if (getContext()->getSettingsRef().insert_null_as_default) + { + const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); + const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); + const auto & output_columns = metadata_snapshot->getColumns(); + + if (input_columns.size() == query_columns.size()) + { + for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) + { + /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with + /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. + if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) + && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) + && !isVariant(query_columns[col_idx].type) + && !isDynamic(query_columns[col_idx].type) + && output_columns.has(query_columns[col_idx].name)) + query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name)); + } + } + } + } + + ThreadGroupPtr running_group; + if (current_thread) + running_group = current_thread->getThreadGroup(); + if (!running_group) + running_group = std::make_shared(getContext()); + for (size_t i = 0; i < sink_streams_size; ++i) + { + auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, + running_group, /* elapsed_counter_ms= */ nullptr); + sink_chains.emplace_back(std::move(out)); + } + for (size_t i = 0; i < pre_streams_size; ++i) + { + auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); + presink_chains.emplace_back(std::move(out)); + } } BlockIO res; - if (query.select) + /// What type of query: INSERT or INSERT SELECT or INSERT WATCH? + if (distributed_pipeline) { - if (settings.parallel_distributed_insert_select) + res.pipeline = std::move(*distributed_pipeline); + } + else if (query.select) + { + const auto & header = presink_chains.at(0).getInputHeader(); + auto actions_dag = ActionsDAG::makeConvertingActions( + pipeline.getHeader().getColumnsWithTypeAndName(), + header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - auto distributed = table->distributedWrite(query, getContext()); - if (distributed) - { - res.pipeline = std::move(*distributed); - } - else - { - res.pipeline = buildInsertSelectPipeline(query, table); - } - } - else + return std::make_shared(in_header, actions); + }); + + /// We need to convert Sparse columns to full, because it's destination storage + /// may not support it or may have different settings for applying Sparse serialization. + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - res.pipeline = buildInsertSelectPipeline(query, table); + return std::make_shared(in_header); + }); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + auto context_ptr = getContext(); + auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + + return counting; + }); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + size_t threads = presink_chains.size(); + + pipeline.resize(1); + + pipeline.addTransform(std::make_shared( + header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + + pipeline.resize(threads); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + }); } + + size_t num_select_threads = pipeline.getNumThreads(); + + for (auto & chain : presink_chains) + resources = chain.detachResources(); + for (auto & chain : sink_chains) + resources = chain.detachResources(); + + pipeline.addChains(std::move(presink_chains)); + pipeline.resize(sink_chains.size()); + pipeline.addChains(std::move(sink_chains)); + + if (!settings.parallel_view_processing) + { + /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. + if (pipeline.getNumThreads() > num_select_threads) + pipeline.setMaxThreads(num_select_threads); + } + else if (pipeline.getNumThreads() < settings.max_threads) + { + /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, + /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. + /// + /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. + pipeline.setMaxThreads(settings.max_threads); + } + + pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr + { + return std::make_shared(cur_header); + }); + + if (!allow_materialized) + { + for (const auto & column : metadata_snapshot->getColumns()) + if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); + } + + res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); } else { - res.pipeline = buildInsertPipeline(query, table); + auto & chain = presink_chains.at(0); + chain.appendChain(std::move(sink_chains.at(0))); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + auto squashing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(squashing)); + + auto balancing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(balancing)); + } + + auto context_ptr = getContext(); + auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + chain.addSource(std::move(counting)); + + res.pipeline = QueryPipeline(std::move(presink_chains[0])); + res.pipeline.setNumThreads(std::min(res.pipeline.getNumThreads(), settings.max_threads)); + res.pipeline.setConcurrencyControl(settings.use_concurrency_control); + + if (query.hasInlinedData() && !async_insert) + { + /// can execute without additional data + auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); + for (auto && buffer : owned_buffers) + format->addBuffer(std::move(buffer)); + + auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); + res.pipeline.complete(std::move(pipe)); + } } - res.pipeline.addStorageHolder(table); + res.pipeline.addResources(std::move(resources)); - if (const auto * mv = dynamic_cast(table.get())) - res.pipeline.addStorageHolder(mv->getTargetTable()); + res.pipeline.addStorageHolder(table); + if (inner_table) + res.pipeline.addStorageHolder(inner_table); return res; } @@ -813,27 +757,17 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, Cont } } - void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const { extendQueryLogElemImpl(elem, context_); } - void registerInterpreterInsertQuery(InterpreterFactory & factory) { auto create_fn = [] (const InterpreterFactory::Arguments & args) { - return std::make_unique( - args.query, - args.context, - args.allow_materialized, - /* no_squash */false, - /* no_destination */false, - /* async_insert */false); + return std::make_unique(args.query, args.context, args.allow_materialized); }; factory.registerInterpreter("InterpreterInsertQuery", create_fn); } - - } diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 894c7c42144..bf73fb2a319 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -23,10 +23,10 @@ public: InterpreterInsertQuery( const ASTPtr & query_ptr_, ContextPtr context_, - bool allow_materialized_, - bool no_squash_, - bool no_destination, - bool async_insert_); + bool allow_materialized_ = false, + bool no_squash_ = false, + bool no_destination_ = false, + bool async_insert_ = false); /** Prepare a request for execution. Return block streams * - the stream into which you can write data to execute the query, if INSERT; @@ -73,17 +73,12 @@ private: ASTPtr query_ptr; const bool allow_materialized; - bool no_squash = false; - bool no_destination = false; + const bool no_squash; + const bool no_destination; const bool async_insert; std::vector> owned_buffers; - std::pair, std::vector> buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); - - QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table); - QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table); - Chain buildSink( const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 25434d1103e..f8b6a6542cc 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,7 +1,6 @@ #include #include #include -#include namespace DB @@ -12,33 +11,24 @@ namespace ErrorCodes } Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) - : min_block_size_rows(min_block_size_rows_) + : header(header_) + , min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) - , header(header_) { } Chunk Squashing::flush() { - if (!accumulated) - return {}; - - auto result = convertToChunk(accumulated.extract()); - chassert(result); - return result; + return convertToChunk(std::move(chunks_to_merge_vec)); } Chunk Squashing::squash(Chunk && input_chunk) { - if (!input_chunk) + if (!input_chunk.hasChunkInfo()) return Chunk(); - auto squash_info = input_chunk.getChunkInfos().extract(); - - if (!squash_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); - - return squash(std::move(squash_info->chunks), std::move(input_chunk.getChunkInfos())); + const auto *info = getInfoFromChunk(input_chunk); + return squash(info->chunks); } Chunk Squashing::add(Chunk && input_chunk) @@ -47,37 +37,48 @@ Chunk Squashing::add(Chunk && input_chunk) return {}; /// Just read block is already enough. - if (isEnoughSize(input_chunk)) + if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes())) { /// If no accumulated data, return just read block. - if (!accumulated) + if (chunks_to_merge_vec.empty()) { - accumulated.add(std::move(input_chunk)); - return convertToChunk(accumulated.extract()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + return res_chunk; } /// Return accumulated data (maybe it has small size) and place new block to accumulated data. - Chunk res_chunk = convertToChunk(accumulated.extract()); - accumulated.add(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); return res_chunk; } /// Accumulated block is already enough. - if (isEnoughSize()) + if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) { /// Return accumulated data and place new block to accumulated data. - Chunk res_chunk = convertToChunk(accumulated.extract()); - accumulated.add(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); return res_chunk; } /// Pushing data into accumulating vector - accumulated.add(std::move(input_chunk)); + expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); /// If accumulated data is big enough, we send it - if (isEnoughSize()) - return convertToChunk(accumulated.extract()); - + if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + { + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + changeCurrentSize(0, 0); + chunks_to_merge_vec.clear(); + return res_chunk; + } return {}; } @@ -89,15 +90,14 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const auto info = std::make_shared(); info->chunks = std::move(chunks); - // It is imortant that chunk is not empty, it has to have columns even if they are empty - auto aggr_chunk = Chunk(header.getColumns(), 0); - aggr_chunk.getChunkInfos().add(std::move(info)); - chassert(aggr_chunk); - return aggr_chunk; + chunks.clear(); + + return Chunk(header.cloneEmptyColumns(), 0, info); } -Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos) +Chunk Squashing::squash(std::vector & input_chunks) { + Chunk accumulated_chunk; std::vector mutable_columns = {}; size_t rows = 0; for (const Chunk & chunk : input_chunks) @@ -119,17 +119,35 @@ Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoColl for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) { const auto source_column = columns[j]; + mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); } } + accumulated_chunk.setColumns(std::move(mutable_columns), rows); + return accumulated_chunk; +} - Chunk result; - result.setColumns(std::move(mutable_columns), rows); - result.setChunkInfos(infos); - result.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos())); +const ChunksToSquash* Squashing::getInfoFromChunk(const Chunk & chunk) +{ + const auto& info = chunk.getChunkInfo(); + const auto * agg_info = typeid_cast(info.get()); - chassert(result); - return result; + if (!agg_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); + + return agg_info; +} + +void Squashing::expandCurrentSize(size_t rows, size_t bytes) +{ + accumulated_size.rows += rows; + accumulated_size.bytes += bytes; +} + +void Squashing::changeCurrentSize(size_t rows, size_t bytes) +{ + accumulated_size.rows = rows; + accumulated_size.bytes = bytes; } bool Squashing::isEnoughSize(size_t rows, size_t bytes) const @@ -138,28 +156,4 @@ bool Squashing::isEnoughSize(size_t rows, size_t bytes) const || (min_block_size_rows && rows >= min_block_size_rows) || (min_block_size_bytes && bytes >= min_block_size_bytes); } - -bool Squashing::isEnoughSize() const -{ - return isEnoughSize(accumulated.getRows(), accumulated.getBytes()); -}; - -bool Squashing::isEnoughSize(const Chunk & chunk) const -{ - return isEnoughSize(chunk.getNumRows(), chunk.bytes()); -} - -void Squashing::CurrentSize::add(Chunk && chunk) -{ - rows += chunk.getNumRows(); - bytes += chunk.bytes(); - chunks.push_back(std::move(chunk)); -} - -std::vector Squashing::CurrentSize::extract() -{ - auto result = std::move(chunks); - *this = {}; - return result; -} } diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 64a9768a71f..d76cca60e41 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -8,18 +8,9 @@ namespace DB { -class ChunksToSquash : public ChunkInfoCloneable +struct ChunksToSquash : public ChunkInfo { -public: - ChunksToSquash() = default; - ChunksToSquash(const ChunksToSquash & other) - { - chunks.reserve(other.chunks.size()); - for (const auto & chunk: other.chunks) - chunks.push_back(chunk.clone()); - } - - std::vector chunks = {}; + mutable std::vector chunks = {}; }; /** Merging consecutive passed blocks to specified minimum size. @@ -45,35 +36,32 @@ public: static Chunk squash(Chunk && input_chunk); Chunk flush(); - void setHeader(Block header_) { header = std::move(header_); } - const Block & getHeader() const { return header; } - -private: - class CurrentSize + bool isDataLeft() + { + return !chunks_to_merge_vec.empty(); + } + + Block header; +private: + struct CurrentSize { - std::vector chunks = {}; size_t rows = 0; size_t bytes = 0; - - public: - explicit operator bool () const { return !chunks.empty(); } - size_t getRows() const { return rows; } - size_t getBytes() const { return bytes; } - void add(Chunk && chunk); - std::vector extract(); }; - const size_t min_block_size_rows; - const size_t min_block_size_bytes; - Block header; + std::vector chunks_to_merge_vec = {}; + size_t min_block_size_rows; + size_t min_block_size_bytes; - CurrentSize accumulated; + CurrentSize accumulated_size; - static Chunk squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos); + static const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); - bool isEnoughSize() const; + static Chunk squash(std::vector & input_chunks); + + void expandCurrentSize(size_t rows, size_t bytes); + void changeCurrentSize(size_t rows, size_t bytes); bool isEnoughSize(size_t rows, size_t bytes) const; - bool isEnoughSize(const Chunk & chunk) const; Chunk convertToChunk(std::vector && chunks) const; }; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index f386e157b14..557065b23ff 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -538,13 +538,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, insert_context->makeQueryContext(); addSettingsForQuery(insert_context, IAST::QueryKind::Insert); - InterpreterInsertQuery interpreter( - query_ptr, - insert_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(query_ptr, insert_context); BlockIO io = interpreter.execute(); PushingPipelineExecutor executor(io.pipeline); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 6ce6f5e454e..a3c5a7ed3ed 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1188,7 +1188,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select } } - /// Check for dynamic subcolumns in unknown required columns. + /// Check for dynamic subcolums in unknown required columns. if (!unknown_required_source_columns.empty()) { for (const NameAndTypePair & pair : source_columns_ordinary) diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 4466be5b3a7..5f6cf2f7230 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -19,6 +19,14 @@ Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns checkNumRowsIsConsistent(); } +Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) + : columns(std::move(columns_)) + , num_rows(num_rows_) + , chunk_info(std::move(chunk_info_)) +{ + checkNumRowsIsConsistent(); +} + static Columns unmuteColumns(MutableColumns && mutable_columns) { Columns columns; @@ -35,11 +43,17 @@ Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_) checkNumRowsIsConsistent(); } +Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) + : columns(unmuteColumns(std::move(columns_))) + , num_rows(num_rows_) + , chunk_info(std::move(chunk_info_)) +{ + checkNumRowsIsConsistent(); +} + Chunk Chunk::clone() const { - auto tmp = Chunk(getColumns(), getNumRows()); - tmp.setChunkInfos(chunk_infos.clone()); - return tmp; + return Chunk(getColumns(), getNumRows(), chunk_info); } void Chunk::setColumns(Columns columns_, UInt64 num_rows_) diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 1348966c0d3..4f753798eaa 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -1,9 +1,7 @@ #pragma once -#include #include - -#include +#include namespace DB { @@ -11,29 +9,11 @@ namespace DB class ChunkInfo { public: - using Ptr = std::shared_ptr; - - ChunkInfo() = default; - ChunkInfo(const ChunkInfo&) = default; - ChunkInfo(ChunkInfo&&) = default; - - virtual Ptr clone() const = 0; virtual ~ChunkInfo() = default; + ChunkInfo() = default; }; - -template -class ChunkInfoCloneable : public ChunkInfo -{ -public: - ChunkInfoCloneable() = default; - ChunkInfoCloneable(const ChunkInfoCloneable & other) = default; - - Ptr clone() const override - { - return std::static_pointer_cast(std::make_shared(*static_cast(this))); - } -}; +using ChunkInfoPtr = std::shared_ptr; /** * Chunk is a list of columns with the same length. @@ -52,26 +32,26 @@ public: class Chunk { public: - using ChunkInfoCollection = CollectionOfDerivedItems; - Chunk() = default; Chunk(const Chunk & other) = delete; Chunk(Chunk && other) noexcept : columns(std::move(other.columns)) , num_rows(other.num_rows) - , chunk_infos(std::move(other.chunk_infos)) + , chunk_info(std::move(other.chunk_info)) { other.num_rows = 0; } Chunk(Columns columns_, UInt64 num_rows_); + Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk(MutableColumns columns_, UInt64 num_rows_); + Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk & operator=(const Chunk & other) = delete; Chunk & operator=(Chunk && other) noexcept { columns = std::move(other.columns); - chunk_infos = std::move(other.chunk_infos); + chunk_info = std::move(other.chunk_info); num_rows = other.num_rows; other.num_rows = 0; return *this; @@ -82,15 +62,15 @@ public: void swap(Chunk & other) noexcept { columns.swap(other.columns); + chunk_info.swap(other.chunk_info); std::swap(num_rows, other.num_rows); - chunk_infos.swap(other.chunk_infos); } void clear() { num_rows = 0; columns.clear(); - chunk_infos.clear(); + chunk_info.reset(); } const Columns & getColumns() const { return columns; } @@ -101,9 +81,9 @@ public: /** Get empty columns with the same types as in block. */ MutableColumns cloneEmptyColumns() const; - ChunkInfoCollection & getChunkInfos() { return chunk_infos; } - const ChunkInfoCollection & getChunkInfos() const { return chunk_infos; } - void setChunkInfos(ChunkInfoCollection chunk_infos_) { chunk_infos = std::move(chunk_infos_); } + const ChunkInfoPtr & getChunkInfo() const { return chunk_info; } + bool hasChunkInfo() const { return chunk_info != nullptr; } + void setChunkInfo(ChunkInfoPtr chunk_info_) { chunk_info = std::move(chunk_info_); } UInt64 getNumRows() const { return num_rows; } UInt64 getNumColumns() const { return columns.size(); } @@ -127,7 +107,7 @@ public: private: Columns columns; UInt64 num_rows = 0; - ChunkInfoCollection chunk_infos; + ChunkInfoPtr chunk_info; void checkNumRowsIsConsistent(); }; @@ -137,15 +117,11 @@ using Chunks = std::vector; /// AsyncInsert needs two kinds of information: /// - offsets of different sub-chunks /// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`. -class AsyncInsertInfo : public ChunkInfoCloneable +class AsyncInsertInfo : public ChunkInfo { public: AsyncInsertInfo() = default; - AsyncInsertInfo(const AsyncInsertInfo & other) = default; - AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) - : offsets(offsets_) - , tokens(tokens_) - {} + explicit AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) : offsets(offsets_), tokens(tokens_) {} std::vector offsets; std::vector tokens; @@ -154,11 +130,9 @@ public: using AsyncInsertInfoPtr = std::shared_ptr; /// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults. -class ChunkMissingValues : public ChunkInfoCloneable +class ChunkMissingValues : public ChunkInfo { public: - ChunkMissingValues(const ChunkMissingValues & other) = default; - using RowsBitMask = std::vector; /// a bit per row for a column const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index d9fab88fe1f..d27002197d2 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -147,10 +147,13 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto agg_info = chunk.getChunkInfos().get()) + if (auto chunk_info = chunk.getChunkInfo()) { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; + if (const auto * agg_info = typeid_cast(chunk_info.get())) + { + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; + } } return true; diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index 25c15d40c9a..cbf73c5cb07 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -73,10 +73,13 @@ bool PullingPipelineExecutor::pull(Block & block) } block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto agg_info = chunk.getChunkInfos().get()) + if (auto chunk_info = chunk.getChunkInfo()) { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; + if (const auto * agg_info = typeid_cast(chunk_info.get())) + { + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; + } } return true; diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 9e499e2c400..a5d334f4f1d 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -179,9 +179,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count); Chunks piece; - piece.emplace_back(std::move(columns), count); - piece.back().setChunkInfos(concatenated.getChunkInfos()); - + piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo()); writeRowGroup(std::move(piece)); } } diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp index 46be6e74693..4136fc5a5f2 100644 --- a/src/Processors/IAccumulatingTransform.cpp +++ b/src/Processors/IAccumulatingTransform.cpp @@ -8,9 +8,8 @@ namespace ErrorCodes } IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header) - : IProcessor({std::move(input_header)}, {std::move(output_header)}) - , input(inputs.front()) - , output(outputs.front()) + : IProcessor({std::move(input_header)}, {std::move(output_header)}), + input(inputs.front()), output(outputs.front()) { } diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp index 86675bcb237..466adf93538 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp @@ -53,11 +53,13 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num if (!input.chunk.hasRows()) return; - if (input.chunk.getChunkInfos().empty()) + const auto & info = input.chunk.getChunkInfo(); + if (!info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm"); Int64 allocated_bytes = 0; - if (auto arenas_info = input.chunk.getChunkInfos().get()) + /// Will be set by AggregatingInOrderTransform during local aggregation; will be nullptr during merging on initiator. + if (const auto * arenas_info = typeid_cast(info.get())) allocated_bytes = arenas_info->allocated_bytes; states[source_num] = State{input.chunk, description, allocated_bytes}; @@ -134,7 +136,7 @@ Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge() info->chunk_num = chunk_num++; Chunk chunk; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); return chunk; } @@ -161,7 +163,7 @@ void FinishAggregatingInOrderAlgorithm::addToAggregation() chunks.emplace_back(std::move(new_columns), current_rows); } - chunks.back().getChunkInfos().add(std::make_shared()); + chunks.back().setChunkInfo(std::make_shared()); states[i].current_row = states[i].to_row; /// We assume that sizes in bytes of rows are almost the same. diff --git a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h index e4f22deec8d..bcf4e759024 100644 --- a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h +++ b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h @@ -6,22 +6,18 @@ namespace DB { /// To carry part level if chunk is produced by a merge tree source -class MergeTreePartLevelInfo : public ChunkInfoCloneable +class MergeTreePartLevelInfo : public ChunkInfo { public: MergeTreePartLevelInfo() = delete; - explicit MergeTreePartLevelInfo(ssize_t part_level) - : origin_merge_tree_part_level(part_level) - { } - MergeTreePartLevelInfo(const MergeTreePartLevelInfo & other) = default; - + explicit MergeTreePartLevelInfo(ssize_t part_level) : origin_merge_tree_part_level(part_level) { } size_t origin_merge_tree_part_level = 0; }; inline size_t getPartLevelFromChunk(const Chunk & chunk) { - const auto part_level_info = chunk.getChunkInfos().get(); - if (part_level_info) + const auto & info = chunk.getChunkInfo(); + if (const auto * part_level_info = typeid_cast(info.get())) return part_level_info->origin_merge_tree_part_level; return 0; } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index cd347d371d9..7b2c7d82a01 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes static IMergingAlgorithm::Status emitChunk(detail::SharedChunkPtr & chunk, bool finished = false) { - chunk->getChunkInfos().add(std::make_shared(std::move(chunk->replace_final_selection))); + chunk->setChunkInfo(std::make_shared(std::move(chunk->replace_final_selection))); return IMergingAlgorithm::Status(std::move(*chunk), finished); } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index 2f23f2a5c4d..a3ccccf0845 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -3,7 +3,6 @@ #include #include #include -#include namespace Poco { @@ -15,13 +14,11 @@ namespace DB /** Use in skipping final to keep list of indices of selected row after merging final */ -struct ChunkSelectFinalIndices : public ChunkInfoCloneable +struct ChunkSelectFinalIndices : public ChunkInfo { - explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); - ChunkSelectFinalIndices(const ChunkSelectFinalIndices & other) = default; - const ColumnPtr column_holder; const ColumnUInt64 * select_final_indices = nullptr; + explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); }; /** Merges several sorted inputs into one. diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index b1b0182a113..fbb47969b2f 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -157,7 +157,7 @@ IProcessor::Status IMergingTransformBase::prepare() bool is_port_full = !output.canPush(); /// Push if has data. - if ((state.output_chunk || !state.output_chunk.getChunkInfos().empty()) && !is_port_full) + if ((state.output_chunk || state.output_chunk.hasChunkInfo()) && !is_port_full) output.push(std::move(state.output_chunk)); if (!is_initialized) diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index be629271736..c218f622870 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -129,7 +129,7 @@ public: IMergingAlgorithm::Status status = algorithm.merge(); - if ((status.chunk && status.chunk.hasRows()) || !status.chunk.getChunkInfos().empty()) + if ((status.chunk && status.chunk.hasRows()) || status.chunk.hasChunkInfo()) { // std::cerr << "Got chunk with " << status.chunk.getNumRows() << " rows" << std::endl; state.output_chunk = std::move(status.chunk); diff --git a/src/Processors/Sinks/RemoteSink.h b/src/Processors/Sinks/RemoteSink.h index c05cc1defcb..30cf958c072 100644 --- a/src/Processors/Sinks/RemoteSink.h +++ b/src/Processors/Sinks/RemoteSink.h @@ -20,7 +20,7 @@ public: } String getName() const override { return "RemoteSink"; } - void consume (Chunk & chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.getColumns())); } + void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); } void onFinish() override { RemoteInserter::onFinish(); } }; diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 36bb70f493f..5f9f9f9b1a1 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -15,8 +15,9 @@ void SinkToStorage::onConsume(Chunk chunk) */ Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); - consume(chunk); - cur_chunk = std::move(chunk); + consume(chunk.clone()); + if (!lastBlockIsDuplicate()) + cur_chunk = std::move(chunk); } SinkToStorage::GenerateResult SinkToStorage::onGenerate() diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index c728fa87b1e..023bbd8b094 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -18,7 +18,8 @@ public: void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } protected: - virtual void consume(Chunk & chunk) = 0; + virtual void consume(Chunk chunk) = 0; + virtual bool lastBlockIsDuplicate() const { return false; } private: std::vector table_locks; @@ -37,7 +38,7 @@ class NullSinkToStorage : public SinkToStorage public: using SinkToStorage::SinkToStorage; std::string getName() const override { return "NullSinkToStorage"; } - void consume(Chunk &) override {} + void consume(Chunk) override {} }; using SinkPtr = std::shared_ptr; diff --git a/src/Processors/Sources/BlocksSource.h b/src/Processors/Sources/BlocksSource.h index 7ac460c14e2..ec0dc9609f1 100644 --- a/src/Processors/Sources/BlocksSource.h +++ b/src/Processors/Sources/BlocksSource.h @@ -43,10 +43,7 @@ protected: info->bucket_num = res.info.bucket_num; info->is_overflows = res.info.is_overflows; - auto chunk = Chunk(res.getColumns(), res.rows()); - chunk.getChunkInfos().add(std::move(info)); - - return chunk; + return Chunk(res.getColumns(), res.rows(), std::move(info)); } private: diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 1578bd389c9..3d7dd3f76b8 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -176,7 +176,7 @@ std::optional RemoteSource::tryGenerate() auto info = std::make_shared(); info->bucket_num = block.info.bucket_num; info->is_overflows = block.info.is_overflows; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); } return chunk; diff --git a/src/Processors/Sources/SourceFromSingleChunk.cpp b/src/Processors/Sources/SourceFromSingleChunk.cpp index 9abe0504d10..00f40a34361 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.cpp +++ b/src/Processors/Sources/SourceFromSingleChunk.cpp @@ -5,9 +5,7 @@ namespace DB { -SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) -{ -} +SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) {} SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows()) { @@ -22,7 +20,7 @@ SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmp auto info = std::make_shared(); info->bucket_num = data.info.bucket_num; info->is_overflows = data.info.is_overflows; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 45b0960ec8f..9ffe15d0f85 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -332,7 +332,7 @@ void AggregatingInOrderTransform::generate() variants.aggregates_pool = variants.aggregates_pools.at(0).get(); /// Pass info about used memory by aggregate functions further. - to_push_chunk.getChunkInfos().add(std::make_shared(cur_block_bytes)); + to_push_chunk.setChunkInfo(std::make_shared(cur_block_bytes)); cur_block_bytes = 0; cur_block_size = 0; @@ -351,12 +351,11 @@ FinalizeAggregatedTransform::FinalizeAggregatedTransform(Block header, Aggregati void FinalizeAggregatedTransform::transform(Chunk & chunk) { if (params->final) - { finalizeChunk(chunk, aggregates_mask); - } - else if (!chunk.getChunkInfos().get()) + else if (!chunk.getChunkInfo()) { - chunk.getChunkInfos().add(std::make_shared()); + auto info = std::make_shared(); + chunk.setChunkInfo(std::move(info)); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 41a0d7fc7f1..5d50e97f552 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -5,7 +5,6 @@ #include #include #include -#include namespace DB { @@ -13,12 +12,10 @@ namespace DB struct InputOrderInfo; using InputOrderInfoPtr = std::shared_ptr; -struct ChunkInfoWithAllocatedBytes : public ChunkInfoCloneable +struct ChunkInfoWithAllocatedBytes : public ChunkInfo { - ChunkInfoWithAllocatedBytes(const ChunkInfoWithAllocatedBytes & other) = default; explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_) : allocated_bytes(allocated_bytes_) {} - Int64 allocated_bytes; }; diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 517f035667f..65f0612d738 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -35,7 +35,7 @@ Chunk convertToChunk(const Block & block) UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); return chunk; } @@ -44,11 +44,15 @@ namespace { const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk) { - auto agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo."); - return agg_info.get(); + return agg_info; } /// Reads chunks from file in native format. Provide chunks with aggregation info. @@ -206,7 +210,11 @@ private: void process(Chunk && chunk) { - auto chunks_to_merge = chunk.getChunkInfos().get(); + if (!chunk.hasChunkInfo()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with chunk info in {}", getName()); + + const auto & info = chunk.getChunkInfo(); + const auto * chunks_to_merge = typeid_cast(info.get()); if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with ChunksToMerge info in {}", getName()); diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 95983c39d1e..e167acde067 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -20,7 +19,7 @@ namespace CurrentMetrics namespace DB { -class AggregatedChunkInfo : public ChunkInfoCloneable +class AggregatedChunkInfo : public ChunkInfo { public: bool is_overflows = false; diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 49a6581e685..965a084bb13 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -27,12 +27,18 @@ public: } ExceptionKeepingTransform::work(); + if (finish_chunk) + { + data.chunk = std::move(finish_chunk); + ready_output = true; + } } protected: void onConsume(Chunk chunk) override { - cur_chunk = Squashing::squash(std::move(chunk)); + if (auto res_chunk = DB::Squashing::squash(std::move(chunk))) + cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows()); } GenerateResult onGenerate() override @@ -42,10 +48,16 @@ protected: res.is_done = true; return res; } + void onFinish() override + { + auto chunk = DB::Squashing::squash({}); + finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); + } private: Squashing squashing; Chunk cur_chunk; + Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 2c6b3bd8638..3dfb9fe178f 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -1,7 +1,6 @@ -#include -#include #include +#include #include #include diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp deleted file mode 100644 index 6786f76cbef..00000000000 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ /dev/null @@ -1,236 +0,0 @@ -#include - -#include - -#include -#include -#include - - -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -void RestoreChunkInfosTransform::transform(Chunk & chunk) -{ - chunk.getChunkInfos().append(chunk_infos.clone()); -} - -namespace DeduplicationToken -{ - -String TokenInfo::getToken() const -{ - if (!isDefined()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is not defined, stage {}, token {}", stage, debugToken()); - - return getTokenImpl(); -} - -String TokenInfo::getTokenImpl() const -{ - String result; - result.reserve(getTotalSize()); - - for (const auto & part : parts) - { - if (!result.empty()) - result.append(":"); - result.append(part); - } - - return result; -} - -String TokenInfo::debugToken() const -{ - return getTokenImpl(); -} - -void TokenInfo::addChunkHash(String part) -{ - if (stage == UNDEFINED && empty()) - stage = DEFINE_SOURCE_WITH_HASHES; - - if (stage != DEFINE_SOURCE_WITH_HASHES) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(std::move(part)); -} - -void TokenInfo::finishChunkHashes() -{ - if (stage == UNDEFINED && empty()) - stage = DEFINE_SOURCE_WITH_HASHES; - - if (stage != DEFINE_SOURCE_WITH_HASHES) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - stage = DEFINED; -} - -void TokenInfo::setUserToken(const String & token) -{ - if (stage == UNDEFINED && empty()) - stage = DEFINE_SOURCE_USER_TOKEN; - - if (stage != DEFINE_SOURCE_USER_TOKEN) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("user-token-{}", token)); -} - -void TokenInfo::setSourceWithUserToken(size_t block_number) -{ - if (stage != DEFINE_SOURCE_USER_TOKEN) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("source-number-{}", block_number)); - - stage = DEFINED; -} - -void TokenInfo::setViewID(const String & id) -{ - if (stage == DEFINED) - stage = DEFINE_VIEW; - - if (stage != DEFINE_VIEW) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("view-id-{}", id)); -} - -void TokenInfo::setViewBlockNumber(size_t block_number) -{ - if (stage != DEFINE_VIEW) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("view-block-{}", block_number)); - - stage = DEFINED; -} - -void TokenInfo::reset() -{ - stage = UNDEFINED; - parts.clear(); -} - -void TokenInfo::addTokenPart(String part) -{ - parts.push_back(std::move(part)); -} - -size_t TokenInfo::getTotalSize() const -{ - if (parts.empty()) - return 0; - - size_t size = 0; - for (const auto & part : parts) - size += part.size(); - - // we reserve more size here to be able to add delimenter between parts. - return size + parts.size() - 1; -} - -#ifdef ABORT_ON_LOGICAL_ERROR -void CheckTokenTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); - - LOG_DEBUG(log, "debug: {}, token: {}", debug, token_info->debugToken()); -} -#endif - -String DefineSourceWithChunkHashTransform::getChunkHash(const Chunk & chunk) -{ - SipHash hash; - for (const auto & colunm : chunk.getColumns()) - colunm->updateHashFast(hash); - - const auto hash_value = hash.get128(); - return toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); -} - - -void DefineSourceWithChunkHashTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in DefineSourceWithChunkHashesTransform"); - - if (token_info->isDefined()) - return; - - token_info->addChunkHash(getChunkHash(chunk)); - token_info->finishChunkHashes(); -} - -void SetUserTokenTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetUserTokenTransform"); - token_info->setUserToken(user_token); -} - -void SetSourceBlockNumberTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform"); - token_info->setSourceWithUserToken(block_number++); -} - -void SetViewIDTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetViewIDTransform"); - token_info->setViewID(view_id); -} - -void SetViewBlockNumberTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform"); - token_info->setViewBlockNumber(block_number++); -} - -void ResetTokenTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in ResetTokenTransform"); - - token_info->reset(); -} - -} -} diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h deleted file mode 100644 index d6aff9e1370..00000000000 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ /dev/null @@ -1,237 +0,0 @@ -#pragma once - -#include -#include - -#include -#include "Common/Logger.h" - - -namespace DB -{ - class RestoreChunkInfosTransform : public ISimpleTransform - { - public: - RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , chunk_infos(std::move(chunk_infos_)) - {} - - String getName() const override { return "RestoreChunkInfosTransform"; } - - void transform(Chunk & chunk) override; - - private: - Chunk::ChunkInfoCollection chunk_infos; - }; - - -namespace DeduplicationToken -{ - class TokenInfo : public ChunkInfoCloneable - { - public: - TokenInfo() = default; - TokenInfo(const TokenInfo & other) = default; - - String getToken() const; - String debugToken() const; - - bool empty() const { return parts.empty(); } - - bool isDefined() const { return stage == DEFINED; } - - void addChunkHash(String part); - void finishChunkHashes(); - - void setUserToken(const String & token); - void setSourceWithUserToken(size_t block_number); - - void setViewID(const String & id); - void setViewBlockNumber(size_t block_number); - - void reset(); - - private: - String getTokenImpl() const; - - void addTokenPart(String part); - size_t getTotalSize() const; - - /* Token has to be prepared in a particular order. - * BuildingStage ensures that token is expanded according the following order. - * Firstly token is expanded with information about the source. - * It could be done with two ways: add several hash sums from the source chunks or provide user defined deduplication token and its sequentional block number. - * - * transition // method - * UNDEFINED -> DEFINE_SOURCE_WITH_HASHES // addChunkHash - * DEFINE_SOURCE_WITH_HASHES -> DEFINE_SOURCE_WITH_HASHES // addChunkHash - * DEFINE_SOURCE_WITH_HASHES -> DEFINED // defineSourceWithChankHashes - * - * transition // method - * UNDEFINED -> DEFINE_SOURCE_USER_TOKEN // setUserToken - * DEFINE_SOURCE_USER_TOKEN -> DEFINED // defineSourceWithUserToken - * - * After token is defined, it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views. - * - * transition // method - * DEFINED -> DEFINE_VIEW // setViewID - * DEFINE_VIEW -> DEFINED // defineViewID - */ - - enum BuildingStage - { - UNDEFINED, - DEFINE_SOURCE_WITH_HASHES, - DEFINE_SOURCE_USER_TOKEN, - DEFINE_VIEW, - DEFINED, - }; - - BuildingStage stage = UNDEFINED; - std::vector parts; - }; - - -#ifdef ABORT_ON_LOGICAL_ERROR - /// use that class only with debug builds in CI for introspection - class CheckTokenTransform : public ISimpleTransform - { - public: - CheckTokenTransform(String debug_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , debug(std::move(debug_)) - { - } - - String getName() const override { return "DeduplicationToken::CheckTokenTransform"; } - - void transform(Chunk & chunk) override; - - private: - String debug; - LoggerPtr log = getLogger("CheckInsertDeduplicationTokenTransform"); - }; -#endif - - - class AddTokenInfoTransform : public ISimpleTransform - { - public: - explicit AddTokenInfoTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::AddTokenInfoTransform"; } - - void transform(Chunk & chunk) override - { - chunk.getChunkInfos().add(std::make_shared()); - } - }; - - - class DefineSourceWithChunkHashTransform : public ISimpleTransform - { - public: - explicit DefineSourceWithChunkHashTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::DefineSourceWithChunkHashesTransform"; } - - // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with hashes from the parts. - // But if there is some table with different engine, we still need to define the source of the data in deduplication token - // We use that transform to define the source as a hash of entire block in deduplication token - void transform(Chunk & chunk) override; - - static String getChunkHash(const Chunk & chunk); - }; - - class ResetTokenTransform : public ISimpleTransform - { - public: - explicit ResetTokenTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::ResetTokenTransform"; } - - void transform(Chunk & chunk) override; - }; - - - class SetUserTokenTransform : public ISimpleTransform - { - public: - SetUserTokenTransform(String user_token_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , user_token(std::move(user_token_)) - { - } - - String getName() const override { return "DeduplicationToken::SetUserTokenTransform"; } - - void transform(Chunk & chunk) override; - - private: - String user_token; - }; - - - class SetSourceBlockNumberTransform : public ISimpleTransform - { - public: - explicit SetSourceBlockNumberTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::SetSourceBlockNumberTransform"; } - - void transform(Chunk & chunk) override; - - private: - size_t block_number = 0; - }; - - - class SetViewIDTransform : public ISimpleTransform - { - public: - SetViewIDTransform(String view_id_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , view_id(std::move(view_id_)) - { - } - - String getName() const override { return "DeduplicationToken::SetViewIDTransform"; } - - void transform(Chunk & chunk) override; - - private: - String view_id; - }; - - - class SetViewBlockNumberTransform : public ISimpleTransform - { - public: - explicit SetViewBlockNumberTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::SetViewBlockNumberTransform"; } - - void transform(Chunk & chunk) override; - - private: - size_t block_number = 0; - }; - -} -} diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 04fabc9a3c6..2fbd2c21b8d 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -1,7 +1,5 @@ #include #include - - namespace DB { diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index ca204bcb482..3e2a9462e54 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -365,9 +365,10 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare() return Status::Finished; } - task = data.chunk.getChunkInfos().get(); - if (!task) + if (!data.chunk.hasChunkInfo()) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info"); + + task = std::dynamic_pointer_cast(data.chunk.getChunkInfo()); } else { @@ -478,7 +479,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() if (output.isFinished()) continue; Chunk chunk; - chunk.getChunkInfos().add(std::make_shared()); + chunk.setChunkInfo(std::make_shared()); output.push(std::move(chunk)); output.finish(); } @@ -495,7 +496,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() { Chunk chunk; auto task = std::make_shared(delayed_blocks, left_delayed_stream_finished_counter); - chunk.getChunkInfos().add(std::move(task)); + chunk.setChunkInfo(task); output.push(std::move(chunk)); } delayed_blocks = nullptr; diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index 5f6d9d6fff2..a308af03662 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -1,7 +1,6 @@ #pragma once #include -#include -#include + namespace DB { @@ -112,12 +111,11 @@ private: }; -class DelayedBlocksTask : public ChunkInfoCloneable +class DelayedBlocksTask : public ChunkInfo { public: DelayedBlocksTask() = default; - DelayedBlocksTask(const DelayedBlocksTask & other) = default; explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_) : delayed_blocks(std::move(delayed_blocks_)) , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_) diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index 9ae80e21a68..1eaa5458d37 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -1,7 +1,6 @@ #include #include - namespace DB { diff --git a/src/Processors/Transforms/MemoryBoundMerging.h b/src/Processors/Transforms/MemoryBoundMerging.h index d7bc320173b..607087fb39c 100644 --- a/src/Processors/Transforms/MemoryBoundMerging.h +++ b/src/Processors/Transforms/MemoryBoundMerging.h @@ -150,7 +150,11 @@ private: if (!chunk.hasRows()) return; - const auto & agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception( ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedForMemoryBoundMergingTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index ea9ebb0f96e..fc40c6894bb 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -30,10 +30,10 @@ void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_ auto info = std::make_shared(); info->bucket_num = bucket; info->is_overflows = is_overflows; - info->chunks = std::make_shared(std::move(chunks)); + info->chunks = std::make_unique(std::move(chunks)); Chunk chunk; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); output.push(std::move(chunk)); } @@ -255,10 +255,11 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) if (!chunk.hasRows()) return; - if (chunk.getChunkInfos().empty()) + const auto & info = chunk.getChunkInfo(); + if (!info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in GroupingAggregatedTransform."); - if (auto agg_info = chunk.getChunkInfos().get()) + if (const auto * agg_info = typeid_cast(info.get())) { Int32 bucket = agg_info->bucket_num; bool is_overflows = agg_info->is_overflows; @@ -274,7 +275,7 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) last_bucket_number[input] = bucket; } } - else if (chunk.getChunkInfos().get()) + else if (typeid_cast(info.get())) { single_level_chunks.emplace_back(std::move(chunk)); } @@ -303,11 +304,7 @@ void GroupingAggregatedTransform::work() Int32 bucket = cur_block.info.bucket_num; auto chunk_info = std::make_shared(); chunk_info->bucket_num = bucket; - - auto chunk = Chunk(cur_block.getColumns(), cur_block.rows()); - chunk.getChunkInfos().add(std::move(chunk_info)); - - chunks_map[bucket].emplace_back(std::move(chunk)); + chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info))); } } } @@ -322,7 +319,9 @@ MergingAggregatedBucketTransform::MergingAggregatedBucketTransform( void MergingAggregatedBucketTransform::transform(Chunk & chunk) { - auto chunks_to_merge = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + const auto * chunks_to_merge = typeid_cast(info.get()); + if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge."); @@ -331,10 +330,11 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) BlocksList blocks_list; for (auto & cur_chunk : *chunks_to_merge->chunks) { - if (cur_chunk.getChunkInfos().empty()) + const auto & cur_info = cur_chunk.getChunkInfo(); + if (!cur_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedBucketTransform."); - if (auto agg_info = cur_chunk.getChunkInfos().get()) + if (const auto * agg_info = typeid_cast(cur_info.get())) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = agg_info->is_overflows; @@ -342,7 +342,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) blocks_list.emplace_back(std::move(block)); } - else if (cur_chunk.getChunkInfos().get()) + else if (typeid_cast(cur_info.get())) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = false; @@ -361,7 +361,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) res_info->is_overflows = chunks_to_merge->is_overflows; res_info->bucket_num = chunks_to_merge->bucket_num; res_info->chunk_num = chunks_to_merge->chunk_num; - chunk.getChunkInfos().add(std::move(res_info)); + chunk.setChunkInfo(std::move(res_info)); auto block = params->aggregator.mergeBlocks(blocks_list, params->final, is_cancelled); @@ -405,7 +405,11 @@ bool SortingAggregatedTransform::tryPushChunk() void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input) { - auto agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedTransform."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h index 3a3c1bd9c1e..77ee3034ffc 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -143,9 +142,9 @@ private: void addChunk(Chunk chunk, size_t from_input); }; -struct ChunksToMerge : public ChunkInfoCloneable +struct ChunksToMerge : public ChunkInfo { - std::shared_ptr chunks; + std::unique_ptr chunks; Int32 bucket_num = -1; bool is_overflows = false; UInt64 chunk_num = 0; // chunk number in order of generation, used during memory bound merging to restore chunks order diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index 446e60a0b81..ad723da7527 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -32,10 +32,11 @@ void MergingAggregatedTransform::consume(Chunk chunk) total_input_rows += input_rows; ++total_input_blocks; - if (chunk.getChunkInfos().empty()) + const auto & info = chunk.getChunkInfo(); + if (!info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedTransform."); - if (auto agg_info = chunk.getChunkInfos().get()) + if (const auto * agg_info = typeid_cast(info.get())) { /** If the remote servers used a two-level aggregation method, * then blocks will contain information about the number of the bucket. @@ -48,7 +49,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) bucket_to_blocks[agg_info->bucket_num].emplace_back(std::move(block)); } - else if (chunk.getChunkInfos().get()) + else if (typeid_cast(info.get())) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); block.info.is_overflows = false; @@ -88,8 +89,7 @@ Chunk MergingAggregatedTransform::generate() UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); return chunk; } diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index ee4dfa6a64e..0f433165f14 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -10,20 +10,20 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform( - Block header_, size_t min_block_size_rows, size_t min_block_size_bytes) - : IInflatingTransform(header_, header_) - , squashing(header_, min_block_size_rows, min_block_size_bytes) + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : IInflatingTransform(header, header), squashing(header, min_block_size_rows, min_block_size_bytes) { } void PlanSquashingTransform::consume(Chunk chunk) { - squashed_chunk = squashing.add(std::move(chunk)); + if (Chunk current_chunk = squashing.add(std::move(chunk)); current_chunk.hasChunkInfo()) + squashed_chunk.swap(current_chunk); } Chunk PlanSquashingTransform::generate() { - if (!squashed_chunk) + if (!squashed_chunk.hasChunkInfo()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); Chunk result_chunk; @@ -33,11 +33,12 @@ Chunk PlanSquashingTransform::generate() bool PlanSquashingTransform::canGenerate() { - return bool(squashed_chunk); + return squashed_chunk.hasChunkInfo(); } Chunk PlanSquashingTransform::getRemaining() { - return squashing.flush(); + Chunk current_chunk = squashing.flush(); + return current_chunk; } } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index e6db245499e..4ad2ec2d089 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -10,7 +10,7 @@ class PlanSquashingTransform : public IInflatingTransform { public: PlanSquashingTransform( - Block header_, size_t min_block_size_rows, size_t min_block_size_bytes); + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "PlanSquashingTransform"; } @@ -23,6 +23,7 @@ protected: private: Squashing squashing; Chunk squashed_chunk; + Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/SelectByIndicesTransform.h b/src/Processors/Transforms/SelectByIndicesTransform.h index b44f5a3203e..480ab1a0f61 100644 --- a/src/Processors/Transforms/SelectByIndicesTransform.h +++ b/src/Processors/Transforms/SelectByIndicesTransform.h @@ -26,7 +26,7 @@ public: void transform(Chunk & chunk) override { size_t num_rows = chunk.getNumRows(); - auto select_final_indices_info = chunk.getChunkInfos().extract(); + const auto * select_final_indices_info = typeid_cast(chunk.getChunkInfo().get()); if (!select_final_indices_info || !select_final_indices_info->select_final_indices) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk passed to SelectByIndicesTransform without indices column"); @@ -41,6 +41,7 @@ public: chunk.setColumns(std::move(columns), index_column->size()); } + chunk.setChunkInfo(nullptr); } }; diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 1fb4433240a..34b733cde5e 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -18,7 +18,9 @@ SquashingTransform::SquashingTransform( void SquashingTransform::onConsume(Chunk chunk) { - cur_chunk = Squashing::squash(squashing.add(std::move(chunk))); + Chunk planned_chunk = squashing.add(std::move(chunk)); + if (planned_chunk.hasChunkInfo()) + cur_chunk = DB::Squashing::squash(std::move(planned_chunk)); } SquashingTransform::GenerateResult SquashingTransform::onGenerate() @@ -31,7 +33,10 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate() void SquashingTransform::onFinish() { - finish_chunk = Squashing::squash(squashing.flush()); + Chunk chunk = squashing.flush(); + if (chunk.hasChunkInfo()) + chunk = DB::Squashing::squash(std::move(chunk)); + finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); } void SquashingTransform::work() @@ -44,7 +49,6 @@ void SquashingTransform::work() } ExceptionKeepingTransform::work(); - if (finish_chunk) { data.chunk = std::move(finish_chunk); @@ -63,14 +67,18 @@ void SimpleSquashingTransform::transform(Chunk & chunk) { if (!finished) { - chunk = Squashing::squash(squashing.add(std::move(chunk))); + Chunk planned_chunk = squashing.add(std::move(chunk)); + if (planned_chunk.hasChunkInfo()) + chunk = DB::Squashing::squash(std::move(planned_chunk)); } else { if (chunk.hasRows()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - chunk = Squashing::squash(squashing.flush()); + chunk = squashing.flush(); + if (chunk.hasChunkInfo()) + chunk = DB::Squashing::squash(std::move(chunk)); } } diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 59fceccb538..aa86879e62c 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -150,7 +150,11 @@ void TotalsHavingTransform::transform(Chunk & chunk) /// Block with values not included in `max_rows_to_group_by`. We'll postpone it. if (overflow_row) { - const auto & agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in TotalsHavingTransform."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in TotalsHavingTransform."); diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 312b333ab33..25fbf13b0e7 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -5,9 +5,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -18,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -27,12 +24,9 @@ #include #include #include -#include "base/defines.h" -#include #include #include -#include namespace ProfileEvents @@ -111,7 +105,7 @@ private: class ExecutingInnerQueryFromViewTransform final : public ExceptionKeepingTransform { public: - ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_, bool disable_deduplication_for_children_); + ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_); String getName() const override { return "ExecutingInnerQueryFromView"; } @@ -122,7 +116,6 @@ protected: private: ViewsDataPtr views_data; ViewRuntimeData & view; - bool disable_deduplication_for_children; struct State { @@ -145,7 +138,7 @@ class PushingToLiveViewSink final : public SinkToStorage public: PushingToLiveViewSink(const Block & header, StorageLiveView & live_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToLiveViewSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; private: StorageLiveView & live_view; @@ -159,7 +152,7 @@ class PushingToWindowViewSink final : public SinkToStorage public: PushingToWindowViewSink(const Block & header, StorageWindowView & window_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToWindowViewSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; private: StorageWindowView & window_view; @@ -223,10 +216,45 @@ std::optional generateViewChain( const auto & insert_settings = insert_context->getSettingsRef(); + // Do not deduplicate insertions into MV if the main insertion is Ok if (disable_deduplication_for_children) { insert_context->setSetting("insert_deduplicate", Field{false}); } + else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views && + !insert_settings.insert_deduplication_token.value.empty()) + { + /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle + * deduplication in complex INSERT flows. + * + * Example: + * + * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1 + * | | + * └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘ + * + * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will + * be inserted into `ds_2_1`. + * + * We are forced to use view id instead of table id because there are some possible INSERT flows where no tables + * are involved. + * + * Example: + * + * landing -┬--> mv_1_1 --┬-> ds_1_1 + * | | + * └--> mv_1_2 --┘ + * + */ + auto insert_deduplication_token = insert_settings.insert_deduplication_token.value; + + if (view_id.hasUUID()) + insert_deduplication_token += "_" + toString(view_id.uuid); + else + insert_deduplication_token += "_" + view_id.getFullNameNotQuoted(); + + insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); + } // Processing of blocks for MVs is done block by block, and there will // be no parallel reading after (plus it is not a costless operation) @@ -333,13 +361,7 @@ std::optional generateViewChain( insert_columns.emplace_back(column.name); } - InterpreterInsertQuery interpreter( - nullptr, - insert_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false); /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false` bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; @@ -356,10 +378,6 @@ std::optional generateViewChain( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } -#ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Before squashing", out.getInputHeader())); -#endif - auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); counting->setProgressCallback(insert_context->getProgressCallback()); @@ -402,19 +420,11 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { -#ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Right after Inner query", out.getInputHeader())); -#endif - auto executing_inner_query = std::make_shared( - storage_header, views_data->views.back(), views_data, disable_deduplication_for_children); + storage_header, views_data->views.back(), views_data); executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms); out.addSource(std::move(executing_inner_query)); - -#ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Right before Inner query", out.getInputHeader())); -#endif } return out; @@ -455,7 +465,11 @@ Chain buildPushingToViewsChain( */ result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); - bool disable_deduplication_for_children = !context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views; + /// If the "root" table deduplicates blocks, there are no need to make deduplication for children + /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks + bool disable_deduplication_for_children = false; + if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) + disable_deduplication_for_children = !no_destination && storage->supportsDeduplication(); auto table_id = storage->getStorageID(); auto views = DatabaseCatalog::instance().getDependentViews(table_id); @@ -546,25 +560,12 @@ Chain buildPushingToViewsChain( auto sink = std::make_shared(live_view_header, *live_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (auto * window_view = dynamic_cast(storage.get())) { auto sink = std::make_shared(window_view->getInputHeader(), *window_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); - } - else if (dynamic_cast(storage.get())) - { - auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); - metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); - sink->setRuntimeData(thread_status, elapsed_counter_ms); - result_chain.addSource(std::move(sink)); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } /// Do not push to destination table if the flag is set else if (!no_destination) @@ -572,15 +573,8 @@ Chain buildPushingToViewsChain( auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); sink->setRuntimeData(thread_status, elapsed_counter_ms); - - result_chain.addSource(std::make_shared(sink->getHeader())); - result_chain.addSource(std::move(sink)); } - else - { - result_chain.addSource(std::make_shared(storage_header)); - } if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); @@ -596,7 +590,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection && chunk_infos, bool disable_deduplication_for_children) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data) { const auto & context = view.context; @@ -643,19 +637,6 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), std::make_shared(std::move(converting)))); - pipeline.addTransform(std::make_shared(std::move(chunk_infos), pipeline.getHeader())); - - if (!disable_deduplication_for_children) - { - String materialize_view_id = view.table_id.hasUUID() ? toString(view.table_id.uuid) : view.table_id.getFullNameNotQuoted(); - pipeline.addTransform(std::make_shared(std::move(materialize_view_id), pipeline.getHeader())); - pipeline.addTransform(std::make_shared(pipeline.getHeader())); - } - else - { - pipeline.addTransform(std::make_shared(pipeline.getHeader())); - } - return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } @@ -747,19 +728,17 @@ IProcessor::Status CopyingDataToViewsTransform::prepare() ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( const Block & header, ViewRuntimeData & view_, - std::shared_ptr views_data_, - bool disable_deduplication_for_children_) + std::shared_ptr views_data_) : ExceptionKeepingTransform(header, view_.sample_block) , views_data(std::move(views_data_)) , view(view_) - , disable_deduplication_for_children(disable_deduplication_for_children_) { } void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { - auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); - state.emplace(process(std::move(block), view, *views_data, std::move(chunk.getChunkInfos()), disable_deduplication_for_children)); + auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); + state.emplace(process(block, view, *views_data)); } @@ -791,10 +770,10 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi { } -void PushingToLiveViewSink::consume(Chunk & chunk) +void PushingToLiveViewSink::consume(Chunk chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(live_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); + live_view.writeBlock(getHeader().cloneWithColumns(chunk.detachColumns()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -814,11 +793,11 @@ PushingToWindowViewSink::PushingToWindowViewSink( { } -void PushingToWindowViewSink::consume(Chunk & chunk) +void PushingToWindowViewSink::consume(Chunk chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); + window_view, getHeader().cloneWithColumns(chunk.detachColumns()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index a9e5b1535c0..f0b2ead687e 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -193,7 +193,7 @@ public: return concurrency_control; } - void addResources(QueryPlanResourceHolder resources_) { resources.append(std::move(resources_)); } + void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); } void setQueryIdHolder(std::shared_ptr query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); } void addContext(ContextPtr context) { resources.interpreter_context.emplace_back(std::move(context)); } diff --git a/src/QueryPipeline/QueryPlanResourceHolder.cpp b/src/QueryPipeline/QueryPlanResourceHolder.cpp index bb2be2c8ffb..2cd4dc42a83 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.cpp +++ b/src/QueryPipeline/QueryPlanResourceHolder.cpp @@ -5,7 +5,7 @@ namespace DB { -QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolder && rhs) noexcept +QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept { table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end()); storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end()); @@ -16,12 +16,6 @@ QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolde return *this; } -QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept -{ - append(std::move(rhs)); - return *this; -} - QueryPlanResourceHolder::QueryPlanResourceHolder() = default; QueryPlanResourceHolder::QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept = default; QueryPlanResourceHolder::~QueryPlanResourceHolder() = default; diff --git a/src/QueryPipeline/QueryPlanResourceHolder.h b/src/QueryPipeline/QueryPlanResourceHolder.h index 10f7f39ab09..ed9eb68b7ba 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.h +++ b/src/QueryPipeline/QueryPlanResourceHolder.h @@ -20,11 +20,8 @@ struct QueryPlanResourceHolder QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept; ~QueryPlanResourceHolder(); - QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &) = delete; - /// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs. QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &&) noexcept; - QueryPlanResourceHolder & append(QueryPlanResourceHolder &&) noexcept; /// Some processors may implicitly use Context or temporary Storage created by Interpreter. /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index fccea9e258e..ac1423f87c1 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -888,11 +888,12 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro while (readDataNext()) { - squashing.setHeader(state.block_for_insert.cloneEmpty()); - auto result_chunk = Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()})); - if (result_chunk) + squashing.header = state.block_for_insert; + auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); + if (planned_chunk.hasChunkInfo()) { - auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); + Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk)); + auto result = state.block_for_insert.cloneWithColumns(result_chunk.getColumns()); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -901,13 +902,12 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - Chunk result_chunk = Squashing::squash(squashing.flush()); - if (!result_chunk) - { - return insert_queue.pushQueryWithBlock(state.parsed_query, squashing.getHeader(), query_context); - } + auto planned_chunk = squashing.flush(); + Chunk result_chunk; + if (planned_chunk.hasChunkInfo()) + result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); + auto result = squashing.header.cloneWithColumns(result_chunk.getColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 8791668cd89..e556bda2561 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -134,7 +134,7 @@ DistributedSink::DistributedSink( } -void DistributedSink::consume(Chunk & chunk) +void DistributedSink::consume(Chunk chunk) { if (is_first_chunk) { @@ -142,7 +142,7 @@ void DistributedSink::consume(Chunk & chunk) is_first_chunk = false; } - auto ordinary_block = getHeader().cloneWithColumns(chunk.getColumns()); + auto ordinary_block = getHeader().cloneWithColumns(chunk.detachColumns()); if (insert_sync) writeSync(ordinary_block); @@ -420,13 +420,7 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si /// to resolve tables (in InterpreterInsertQuery::getTable()) auto copy_query_ast = query_ast->clone(); - InterpreterInsertQuery interp( - copy_query_ast, - job.local_context, - allow_materialized, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized); auto block_io = interp.execute(); job.pipeline = std::move(block_io.pipeline); @@ -721,13 +715,7 @@ void DistributedSink::writeToLocal(const Cluster::ShardInfo & shard_info, const try { - InterpreterInsertQuery interp( - query_ast, - context, - allow_materialized, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interp(query_ast, context, allow_materialized); auto block_io = interp.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index 5b7396f2c6f..a4c95633595 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -49,7 +49,7 @@ public: const Names & columns_to_send_); String getName() const override { return "DistributedSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; private: diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 0f9bd8b6ff9..abd4b4ce23b 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -740,14 +740,7 @@ bool StorageFileLog::streamToViews() auto new_context = Context::createCopy(getContext()); - InterpreterInsertQuery interpreter( - insert, - new_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); - + InterpreterInsertQuery interpreter(insert, new_context, false, true, true); auto block_io = interpreter.execute(); /// Each stream responsible for closing it's files and store meta diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 809401bb279..f5c5d093ce1 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1099,13 +1099,7 @@ bool StorageKafka::streamToViews() // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter( - insert, - kafka_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true); auto block_io = interpreter.execute(); // Create a stream for each consumer and join them in a union stream diff --git a/src/Storages/LiveView/LiveViewSink.h b/src/Storages/LiveView/LiveViewSink.h index 9803fa0a160..792133ced64 100644 --- a/src/Storages/LiveView/LiveViewSink.h +++ b/src/Storages/LiveView/LiveViewSink.h @@ -71,9 +71,9 @@ public: new_hash.reset(); } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); block.updateHash(*new_hash); new_blocks->push_back(std::move(block)); } diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 82759e8a851..57a1ea302f9 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -21,7 +21,6 @@ limitations under the License. */ #include #include #include -#include #include #include #include @@ -331,7 +330,7 @@ Pipe StorageLiveView::watch( return reader; } -void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) +void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) { auto output = std::make_shared(*this); @@ -408,21 +407,6 @@ void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Ch builder = interpreter.buildQueryPipeline(); } - builder.addSimpleTransform([&](const Block & cur_header) - { - return std::make_shared(chunk_infos.clone(), cur_header); - }); - - String live_view_id = live_view.getStorageID().hasUUID() ? toString(live_view.getStorageID().uuid) : live_view.getStorageID().getFullNameNotQuoted(); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(live_view_id, stream_header); - }); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(stream_header); - }); - builder.addSimpleTransform([&](const Block & cur_header) { return std::make_shared(cur_header); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 12d8e898347..91daac32c7b 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -118,7 +118,7 @@ public: return 0; } - void writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); + void writeBlock(const Block & block, ContextPtr context); void refresh(); diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index ff5214a5e51..bc8cb0ce69a 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -377,13 +377,7 @@ void RefreshTask::executeRefreshUnlocked(std::shared_ptr(task->getInfo().data_part->info.level)); - return ChunkAndProgress{ - .chunk = std::move(chunk), + .chunk = Chunk(ordered_columns, res.row_count, add_part_level ? std::make_shared(task->getInfo().data_part->info.level) : nullptr), .num_read_rows = res.num_read_rows, .num_read_bytes = res.num_read_bytes, .is_finished = false}; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 4f90f7131da..02f8d6f4f6a 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -264,10 +264,7 @@ try ++it; } - auto result = Chunk(std::move(res_columns), rows_read); - if (add_part_level) - result.getChunkInfos().add(std::make_shared(data_part->info.level)); - return result; + return Chunk(std::move(res_columns), rows_read, add_part_level ? std::make_shared(data_part->info.level) : nullptr); } } else diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index d8cfce1ca99..05751e0fa6f 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,27 +1,14 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include - -#include +#include +#include +#include namespace ProfileEvents { extern const Event DuplicatedInsertedBlocks; } -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - namespace DB { @@ -71,12 +58,12 @@ void MergeTreeSink::onCancel() { } -void MergeTreeSink::consume(Chunk & chunk) +void MergeTreeSink::consume(Chunk chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(nullptr, context, false); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); @@ -89,18 +76,6 @@ void MergeTreeSink::consume(Chunk & chunk) size_t streams = 0; bool support_parallel_write = false; - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", - storage.getStorageID().getNameForLogs()); - - const bool need_to_define_dedup_token = !token_info->isDefined(); - - String block_dedup_token; - if (token_info->isDefined()) - block_dedup_token = token_info->getToken(); - for (auto & current_block : part_blocks) { ProfileEvents::Counters part_counters; @@ -125,16 +100,22 @@ void MergeTreeSink::consume(Chunk & chunk) if (!temp_part.part) continue; - if (need_to_define_dedup_token) - { - chassert(temp_part.part); - const auto hash_value = temp_part.part->getPartBlockIDHash(); - token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); - } - if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) support_parallel_write = true; + String block_dedup_token; + if (storage.getDeduplicationLog()) + { + const String & dedup_token = settings.insert_deduplication_token; + if (!dedup_token.empty()) + { + /// multiple blocks can be inserted within the same insert query + /// an ordinal number is added to dedup token to generate a distinctive block id for each block + block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); + ++chunk_dedup_seqnum; + } + } + size_t max_insert_delayed_streams_for_parallel_write; if (settings.max_insert_delayed_streams_for_parallel_write.changed) @@ -146,7 +127,6 @@ void MergeTreeSink::consume(Chunk & chunk) /// In case of too much columns/parts in block, flush explicitly. streams += temp_part.streams.size(); - if (streams > max_insert_delayed_streams_for_parallel_write) { finishDelayedChunk(); @@ -163,16 +143,11 @@ void MergeTreeSink::consume(Chunk & chunk) { .temp_part = std::move(temp_part), .elapsed_ns = elapsed_ns, - .block_dedup_token = block_dedup_token, + .block_dedup_token = std::move(block_dedup_token), .part_counters = std::move(part_counters), }); } - if (need_to_define_dedup_token) - { - token_info->finishChunkHashes(); - } - finishDelayedChunk(); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); @@ -185,8 +160,6 @@ void MergeTreeSink::finishDelayedChunk() if (!delayed_chunk) return; - const Settings & settings = context->getSettingsRef(); - for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -205,8 +178,7 @@ void MergeTreeSink::finishDelayedChunk() storage.fillNewPartName(part, lock); auto * deduplication_log = storage.getDeduplicationLog(); - - if (settings.insert_deduplicate && deduplication_log) + if (deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); auto res = deduplication_log->addPart(block_id, part->info); diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index 90976020d52..cf6715a3415 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -25,7 +25,7 @@ public: ~MergeTreeSink() override; String getName() const override { return "MergeTreeSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onStart() override; void onFinish() override; void onCancel() override; @@ -36,6 +36,7 @@ private: size_t max_parts_per_block; ContextPtr context; StorageSnapshotPtr storage_snapshot; + UInt64 chunk_dedup_seqnum = 0; /// input chunk ordinal number in case of dedup token UInt64 num_blocks_processed = 0; /// We can delay processing for previous chunk and start writing a new one. diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 3dbcb5e5bda..a552ee89aee 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1297,7 +1297,6 @@ void PartMergerWriter::prepare() bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Block cur_block; - Block projection_header; if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block)) { if (ctx->minmax_idx) @@ -1315,12 +1314,14 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - projection_squashes[i].setHeader(block_to_squash.cloneEmpty()); + projection_squashes[i].header = block_to_squash; + Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); - Chunk squashed_chunk = Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); - if (squashed_chunk) + if (planned_chunk.hasChunkInfo()) { - auto result = projection_squashes[i].getHeader().cloneWithColumns(squashed_chunk.detachColumns()); + Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = block_to_squash.cloneWithColumns(projection_chunk.getColumns()); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); @@ -1341,10 +1342,12 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash_plan = projection_squashes[i]; - auto squashed_chunk = Squashing::squash(projection_squash_plan.flush()); - if (squashed_chunk) + auto planned_chunk = projection_squash_plan.flush(); + if (planned_chunk.hasChunkInfo()) { - auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns()); + Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = projection_squash_plan.header.cloneWithColumns(projection_chunk.getColumns()); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index bbae054fbed..4b4f4c33e7d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -1,25 +1,21 @@ +#include +#include +#include +#include +#include #include "Common/Exception.h" #include #include #include -#include #include -#include -#include -#include -#include -#include -#include -#include +#include #include #include -#include -#include -#include - +#include +#include +#include +#include #include -#include - namespace ProfileEvents { @@ -257,12 +253,12 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const } template -void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) +void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, false); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); const auto & settings = context->getSettingsRef(); @@ -288,25 +284,13 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if constexpr (async_insert) { - const auto async_insert_info_ptr = chunk.getChunkInfos().get(); - if (async_insert_info_ptr) + const auto & chunk_info = chunk.getChunkInfo(); + if (const auto * async_insert_info_ptr = typeid_cast(chunk_info.get())) async_insert_info = std::make_shared(async_insert_info_ptr->offsets, async_insert_info_ptr->tokens); else throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } - String block_dedup_token; - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", - storage.getStorageID().getNameForLogs()); - - const bool need_to_define_dedup_token = !token_info->isDefined(); - - if (token_info->isDefined()) - block_dedup_token = token_info->getToken(); - auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); using DelayedPartition = typename ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition; @@ -358,10 +342,23 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } else { + if (deduplicate) { + String block_dedup_token; + /// We add the hash from the data and partition identifier to deduplication ID. /// That is, do not insert the same data to the same partition twice. + + const String & dedup_token = settings.insert_deduplication_token; + if (!dedup_token.empty()) + { + /// multiple blocks can be inserted within the same insert query + /// an ordinal number is added to dedup token to generate a distinctive block id for each block + block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); + ++chunk_dedup_seqnum; + } + block_id = temp_part.part->getZeroLevelPartBlockID(block_dedup_token); LOG_DEBUG(log, "Wrote block with ID '{}', {} rows{}", block_id, current_block.block.rows(), quorumLogMessage(replicas_num)); } @@ -369,13 +366,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) { LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - - if (need_to_define_dedup_token) - { - chassert(temp_part.part); - const auto hash_value = temp_part.part->getPartBlockIDHash(); - token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); - } } profile_events_scope.reset(); @@ -421,15 +411,17 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) )); } - if (need_to_define_dedup_token) - { - token_info->finishChunkHashes(); - } - finishDelayedChunk(zookeeper); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); + /// If deduplicated data should not be inserted into MV, we need to set proper + /// value for `last_block_is_duplicate`, which is possible only after the part is committed. + /// Othervide we can delay commit. + /// TODO: we can also delay commit if there is no MVs. + if (!settings.deduplicate_blocks_in_dependent_materialized_views) + finishDelayedChunk(zookeeper); + ++num_blocks_processed; } @@ -439,6 +431,8 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF if (!delayed_chunk) return; + last_block_is_duplicate = false; + for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -451,6 +445,8 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF { bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num).second; + last_block_is_duplicate = last_block_is_duplicate || deduplicated; + /// Set a special error code if the block is duplicate int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); @@ -539,7 +535,7 @@ bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::Mutabl ProfileEventsScope profile_events_scope; String original_part_dir = part->getDataPartStorage().getPartDirectory(); - auto try_rollback_part_rename = [this, &part, &original_part_dir] () + auto try_rollback_part_rename = [this, &part, &original_part_dir]() { if (original_part_dir == part->getDataPartStorage().getPartDirectory()) return; @@ -1155,16 +1151,8 @@ void ReplicatedMergeTreeSinkImpl::onStart() template void ReplicatedMergeTreeSinkImpl::onFinish() { - const auto & settings = context->getSettingsRef(); - - ZooKeeperWithFaultInjectionPtr zookeeper = ZooKeeperWithFaultInjection::createInstance( - settings.insert_keeper_fault_injection_probability, - settings.insert_keeper_fault_injection_seed, - storage.getZooKeeper(), - "ReplicatedMergeTreeSink::onFinish", - log); - - finishDelayedChunk(zookeeper); + auto zookeeper = storage.getZooKeeper(); + finishDelayedChunk(std::make_shared(zookeeper)); } template diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 7d025361717..39623c20584 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -51,7 +51,7 @@ public: ~ReplicatedMergeTreeSinkImpl() override; void onStart() override; - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; String getName() const override { return "ReplicatedMergeTreeSink"; } @@ -59,6 +59,16 @@ public: /// For ATTACHing existing data on filesystem. bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part); + /// For proper deduplication in MaterializedViews + bool lastBlockIsDuplicate() const override + { + /// If MV is responsible for deduplication, block is not considered duplicating. + if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) + return false; + + return last_block_is_duplicate; + } + struct DelayedChunk; private: std::vector detectConflictsInAsyncBlockIDs(const std::vector & ids); @@ -116,6 +126,7 @@ private: bool allow_attach_while_readonly = false; bool quorum_parallel = false; const bool deduplicate = true; + bool last_block_is_duplicate = false; UInt64 num_blocks_processed = 0; LoggerPtr log; diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index 36899011e33..4fb81d69070 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -40,7 +40,7 @@ void MessageQueueSink::onFinish() producer->finish(); } -void MessageQueueSink::consume(Chunk & chunk) +void MessageQueueSink::consume(Chunk chunk) { const auto & columns = chunk.getColumns(); if (columns.empty()) diff --git a/src/Storages/MessageQueueSink.h b/src/Storages/MessageQueueSink.h index 4a9248c6c4d..b3c1e61734f 100644 --- a/src/Storages/MessageQueueSink.h +++ b/src/Storages/MessageQueueSink.h @@ -35,7 +35,7 @@ public: String getName() const override { return storage_name + "Sink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onStart() override; void onFinish() override; diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 8f0e2d76473..0b88a9e8929 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -644,13 +644,7 @@ bool StorageNATS::streamToViews() insert->table_id = table_id; // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter( - insert, - nats_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, nats_context, false, true, true); auto block_io = interpreter.execute(); auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index d2bdd0af302..f2f6eac333c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -39,12 +39,12 @@ StorageObjectStorageSink::StorageObjectStorageSink( configuration->format, *write_buf, sample_block, context, format_settings_); } -void StorageObjectStorageSink::consume(Chunk & chunk) +void StorageObjectStorageSink::consume(Chunk chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void StorageObjectStorageSink::onCancel() diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index 6ab531bb21a..e0081193686 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "StorageObjectStorageSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onCancel() override; diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 14b828e7268..4388864434e 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -454,13 +454,7 @@ bool StorageObjectStorageQueue::streamToViews() while (!shutdown_called && !file_iterator->isFinished()) { - InterpreterInsertQuery interpreter( - insert, - queue_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, queue_context, false, true, true); auto block_io = interpreter.execute(); auto read_from_format_info = prepareReadingFromFormat( block_io.pipeline.getHeader().getNames(), diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp index ee2570756ed..09b009b26d8 100644 --- a/src/Storages/PartitionedSink.cpp +++ b/src/Storages/PartitionedSink.cpp @@ -51,7 +51,7 @@ SinkPtr PartitionedSink::getSinkForPartitionKey(StringRef partition_key) return it->second; } -void PartitionedSink::consume(Chunk & chunk) +void PartitionedSink::consume(Chunk chunk) { const auto & columns = chunk.getColumns(); @@ -104,7 +104,7 @@ void PartitionedSink::consume(Chunk & chunk) for (const auto & [partition_key, partition_index] : partition_id_to_chunk_index) { auto sink = getSinkForPartitionKey(partition_key); - sink->consume(partition_index_to_chunk[partition_index]); + sink->consume(std::move(partition_index_to_chunk[partition_index])); } } diff --git a/src/Storages/PartitionedSink.h b/src/Storages/PartitionedSink.h index fcd67556dc9..68edeb6fd73 100644 --- a/src/Storages/PartitionedSink.h +++ b/src/Storages/PartitionedSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "PartitionedSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onException(std::exception_ptr exception) override; diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index 44479bd01e2..ba3cc6f58d0 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -697,13 +697,7 @@ void MaterializedPostgreSQLConsumer::syncTables() insert->table_id = storage->getStorageID(); insert->columns = std::make_shared(buffer->columns_ast); - InterpreterInsertQuery interpreter( - insert, - insert_context, - /* allow_materialized */ true, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, insert_context, true); auto io = interpreter.execute(); auto input = std::make_shared( result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows())); diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index f632e553a0d..2bb1e2dde0d 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -437,13 +437,7 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection auto insert_context = materialized_storage->getNestedTableContext(); - InterpreterInsertQuery interpreter( - insert, - insert_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, insert_context); auto block_io = interpreter.execute(); const StorageInMemoryMetadata & storage_metadata = nested_storage->getInMemoryMetadata(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index f3d2aff68c8..e4b19992151 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1129,13 +1129,7 @@ bool StorageRabbitMQ::tryStreamToViews() } // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter( - insert, - rabbitmq_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true); auto block_io = interpreter.execute(); block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 4b5188ca9f2..90792c59d38 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -218,7 +218,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali return {std::move(serialized_key_column), std::move(serialized_value_column)}; } -void EmbeddedRocksDBBulkSink::consume(Chunk & chunk_) +void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) { std::vector chunks_to_write = squash(std::move(chunk_)); @@ -247,10 +247,7 @@ void EmbeddedRocksDBBulkSink::onFinish() { /// If there is any data left, write it. if (!chunks.empty()) - { - Chunk empty; - consume(empty); - } + consume({}); } String EmbeddedRocksDBBulkSink::getTemporarySSTFilePath() diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h index 64190c8c86f..1f548e7813d 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h @@ -32,7 +32,7 @@ public: ~EmbeddedRocksDBBulkSink() override; - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp index 1f7f6939f40..c451cfd1bf5 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp @@ -29,7 +29,7 @@ EmbeddedRocksDBSink::EmbeddedRocksDBSink( serializations = getHeader().getSerializations(); } -void EmbeddedRocksDBSink::consume(Chunk & chunk) +void EmbeddedRocksDBSink::consume(Chunk chunk) { auto rows = chunk.getNumRows(); const auto & columns = chunk.getColumns(); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.h b/src/Storages/RocksDB/EmbeddedRocksDBSink.h index 2e1e0c7b429..011322df829 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.h @@ -17,7 +17,7 @@ public: StorageEmbeddedRocksDB & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; String getName() const override { return "EmbeddedRocksDBSink"; } private: diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 3473166a080..b9d3e071b6c 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -313,8 +313,7 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt Block block; while (executor.pull(block)) { - auto chunk = Chunk(block.getColumns(), block.rows()); - sink->consume(chunk); + sink->consume(Chunk{block.getColumns(), block.rows()}); } } diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index b064fba223a..a3f6b6afc5d 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -607,7 +607,7 @@ public: String getName() const override { return "BufferSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { size_t rows = chunk.getNumRows(); if (!rows) @@ -1020,13 +1020,7 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl auto insert_context = Context::createCopy(getContext()); insert_context->makeQueryContext(); - InterpreterInsertQuery interpreter( - insert, - insert_context, - allow_materialized, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter{insert, insert_context, allow_materialized}; auto block_io = interpreter.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 67586985ce8..849fa5dbe0b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1050,13 +1050,7 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu const auto & shard_info = shards_info[shard_index]; if (shard_info.isLocal()) { - InterpreterInsertQuery interpreter( - new_query, - query_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(new_query, query_context); pipeline.addCompletedPipeline(interpreter.execute().pipeline); } else diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 3fb397c7b81..7f39ff615f0 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1778,12 +1778,12 @@ public: String getName() const override { return "StorageFileSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { std::lock_guard cancel_lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void onCancel() override diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index c80e799a92b..20f99070000 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -119,10 +119,10 @@ public: std::string getName() const override { return "StorageKeeperMapSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -1248,10 +1248,7 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca Block block; while (executor.pull(block)) - { - auto chunk = Chunk(block.getColumns(), block.rows()); - sink->consume(chunk); - } + sink->consume(Chunk{block.getColumns(), block.rows()}); sink->finalize(strict); } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 463694c63aa..de0324d7998 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -22,6 +21,7 @@ #include #include +#include "StorageLogSettings.h" #include #include #include @@ -341,7 +341,7 @@ public: } } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; private: @@ -398,9 +398,9 @@ private: }; -void LogSink::consume(Chunk & chunk) +void LogSink::consume(Chunk chunk) { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); metadata_snapshot->check(block, true); for (auto & stream : streams | boost::adaptors::map_values) diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index b1bd7053c2e..f69c4adb552 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -63,7 +63,7 @@ public: String getName() const override { return "MemorySink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); storage_snapshot->metadata->check(block, true); diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index e0818fafae9..62a2a048642 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -106,12 +107,12 @@ public: String getName() const override { return "StorageMongoDBSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { Poco::MongoDB::Database db(db_name); Poco::MongoDB::Document::Vector documents; - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); size_t num_rows = block.rows(); size_t num_cols = block.columns(); diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 2a8a7bd2ee7..da391909dff 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -151,9 +151,9 @@ public: String getName() const override { return "StorageMySQLSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); auto blocks = splitBlocks(block, max_batch_rows); mysqlxx::Transaction trans(entry); try diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index cdfeab62b58..a8713c61e4d 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -227,9 +227,9 @@ public: String getName() const override { return "PostgreSQLSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); if (!inserter) { if (on_conflict.empty()) diff --git a/src/Storages/StorageRedis.cpp b/src/Storages/StorageRedis.cpp index 1a275320f43..83bb3c606c9 100644 --- a/src/Storages/StorageRedis.cpp +++ b/src/Storages/StorageRedis.cpp @@ -147,7 +147,7 @@ class RedisSink : public SinkToStorage public: RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; String getName() const override { return "RedisSink"; } private: @@ -169,10 +169,10 @@ RedisSink::RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadat } } -void RedisSink::consume(Chunk & chunk) +void RedisSink::consume(Chunk chunk) { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -567,8 +567,7 @@ void StorageRedis::mutate(const MutationCommands & commands, ContextPtr context_ Block block; while (executor.pull(block)) { - Chunk chunk(block.getColumns(), block.rows()); - sink->consume(chunk); + sink->consume(Chunk{block.getColumns(), block.rows()}); } } diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 85417a2f2a4..179e4cee199 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -141,7 +141,7 @@ public: String getName() const override { return "SQLiteSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString sqlbuf; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 0d094c15880..5b7f9fc0ac2 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -44,7 +44,7 @@ public: const String & backup_file_name_, bool persistent_); String getName() const override { return "SetOrJoinSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; private: @@ -82,9 +82,9 @@ SetOrJoinSink::SetOrJoinSink( { } -void SetOrJoinSink::consume(Chunk & chunk) +void SetOrJoinSink::consume(Chunk chunk) { - Block block = getHeader().cloneWithColumns(chunk.getColumns()); + Block block = getHeader().cloneWithColumns(chunk.detachColumns()); table.insertBlock(block, getContext()); if (persistent) diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 9b6d9f041e1..8df87d6290f 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -226,9 +226,9 @@ public: } } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - block_out->write(getHeader().cloneWithColumns(chunk.getColumns())); + block_out->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void onFinish() override diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 90e05c44e31..895da028fc2 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -565,12 +565,12 @@ StorageURLSink::StorageURLSink( } -void StorageURLSink::consume(Chunk & chunk) +void StorageURLSink::consume(Chunk chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void StorageURLSink::onCancel() diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 1804079e75f..fa7cc6eeeef 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -251,7 +251,7 @@ public: const String & method = Poco::Net::HTTPRequest::HTTP_POST); std::string getName() const override { return "StorageURLSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onCancel() override; void onException(std::exception_ptr exception) override; void onFinish() override; diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index c9c606de049..cb46cd19517 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -119,7 +119,7 @@ public: ZooKeeperSink(const Block & header, ContextPtr context) : SinkToStorage(header), zookeeper(context->getZooKeeper()) { } String getName() const override { return "ZooKeeperSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); size_t rows = block.rows(); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index e36247103c7..77e6ee9cb24 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -305,7 +304,7 @@ namespace public: explicit AddingAggregatedChunkInfoTransform(Block header) : ISimpleTransform(header, header, false) { } - void transform(Chunk & chunk) override { chunk.getChunkInfos().add(std::make_shared()); } + void transform(Chunk & chunk) override { chunk.setChunkInfo(std::make_shared()); } String getName() const override { return "AddingAggregatedChunkInfoTransform"; } }; @@ -690,13 +689,7 @@ inline void StorageWindowView::fire(UInt32 watermark) StoragePtr target_table = getTargetTable(); auto insert = std::make_shared(); insert->table_id = target_table->getStorageID(); - InterpreterInsertQuery interpreter( - insert, - getContext(), - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, getContext()); auto block_io = interpreter.execute(); auto pipe = Pipe(std::make_shared(blocks, header)); @@ -1420,7 +1413,7 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) } void StorageWindowView::writeIntoWindowView( - StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) + StorageWindowView & window_view, const Block & block, ContextPtr local_context) { window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) @@ -1435,7 +1428,7 @@ void StorageWindowView::writeIntoWindowView( window_view.max_watermark = window_view.getWindowUpperBound(first_record_timestamp); } - Pipe pipe(std::make_shared(block)); + Pipe pipe(std::make_shared(block.cloneEmpty(), Chunk(block.getColumns(), block.rows()))); UInt32 lateness_bound = 0; UInt32 t_max_watermark = 0; @@ -1480,10 +1473,10 @@ void StorageWindowView::writeIntoWindowView( auto syntax_result = TreeRewriter(local_context).analyze(query, columns); auto filter_expression = ExpressionAnalyzer(filter_function, syntax_result, local_context).getActionsDAG(false); - pipe.addSimpleTransform([&](const Block & header_) + pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( - header_, std::make_shared(filter_expression), + header, std::make_shared(filter_expression), filter_function->getColumnName(), true); }); } @@ -1538,30 +1531,6 @@ void StorageWindowView::writeIntoWindowView( QueryProcessingStage::WithMergeableState); builder = select_block.buildQueryPipeline(); - - builder.addSimpleTransform([&](const Block & stream_header) - { - // Can't move chunk_infos here, that function could be called several times - return std::make_shared(chunk_infos.clone(), stream_header); - }); - - String window_view_id = window_view.getStorageID().hasUUID() ? toString(window_view.getStorageID().uuid) : window_view.getStorageID().getFullNameNotQuoted(); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(window_view_id, stream_header); - }); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(stream_header); - }); - -#ifdef ABORT_ON_LOGICAL_ERROR - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared("StorageWindowView: Afrer tmp table before squashing", stream_header); - }); -#endif - builder.addSimpleTransform([&](const Block & current_header) { return std::make_shared( @@ -1601,13 +1570,6 @@ void StorageWindowView::writeIntoWindowView( lateness_upper_bound); }); -#ifdef ABORT_ON_LOGICAL_ERROR - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared("StorageWindowView: Afrer WatermarkTransform", stream_header); - }); -#endif - auto inner_table = window_view.getInnerTable(); auto lock = inner_table->lockForShare( local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); @@ -1624,16 +1586,9 @@ void StorageWindowView::writeIntoWindowView( auto convert_actions = std::make_shared( convert_actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); - builder.addSimpleTransform([&](const Block & header_) { return std::make_shared(header_, convert_actions); }); + builder.addSimpleTransform([&](const Block & header) { return std::make_shared(header, convert_actions); }); } -#ifdef ABORT_ON_LOGICAL_ERROR - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared("StorageWindowView: Before out", stream_header); - }); -#endif - builder.addChain(Chain(std::move(output))); builder.setSinks([&](const Block & cur_header, Pipe::StreamType) { diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 14ac65091d3..f79867df424 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -166,7 +166,7 @@ public: BlockIO populate(); - static void writeIntoWindowView(StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); + static void writeIntoWindowView(StorageWindowView & window_view, const Block & block, ContextPtr context); ASTPtr getMergeableQuery() const { return mergeable_query->clone(); } diff --git a/tests/integration/test_force_deduplication/test.py b/tests/integration/test_force_deduplication/test.py index 14c11bc8500..87b2c45bbc5 100644 --- a/tests/integration/test_force_deduplication/test.py +++ b/tests/integration/test_force_deduplication/test.py @@ -29,8 +29,6 @@ def get_counts(): def test_basic(start_cluster): - old_src, old_a, old_b, old_c = 0, 0, 0, 0 - node.query( """ CREATE TABLE test (A Int64) ENGINE = ReplicatedMergeTree ('/clickhouse/test/tables/test','1') ORDER BY tuple(); @@ -41,15 +39,6 @@ def test_basic(start_cluster): INSERT INTO test values(999); """ ) - - src, a, b, c = get_counts() - assert src == old_src + 1 - assert a == old_a + 2 - assert b == old_b + 2 - assert c == old_c + 2 - old_src, old_a, old_b, old_c = src, a, b, c - - # that issert fails on test_mv_b due to partitions by A with pytest.raises(QueryRuntimeException): node.query( """ @@ -57,51 +46,34 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(10); """ ) - src, a, b, c = get_counts() - assert src == old_src + 10 - assert a == old_a + 10 - assert b == old_b - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - # deduplication only for src table + old_src, old_a, old_b, old_c = get_counts() + # number of rows in test_mv_a and test_mv_c depends on order of inserts into views + assert old_src == 11 + assert old_a in (1, 11) + assert old_b == 1 + assert old_c in (1, 11) + node.query("INSERT INTO test SELECT number FROM numbers(10)") src, a, b, c = get_counts() - assert src == old_src - assert a == old_a + 10 - assert b == old_b + 10 - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - - # deduplication for MV tables does not work, because previous inserts have not written their deduplications tokens to the log due to `deduplicate_blocks_in_dependent_materialized_views = 0`. - node.query( - """ - SET deduplicate_blocks_in_dependent_materialized_views = 1; - INSERT INTO test SELECT number FROM numbers(10); - """ - ) - src, a, b, c = get_counts() - assert src == old_src - assert a == old_a + 10 - assert b == old_b + 10 - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - - # deduplication for all the tables - node.query( - """ - SET deduplicate_blocks_in_dependent_materialized_views = 1; - INSERT INTO test SELECT number FROM numbers(10); - """ - ) - src, a, b, c = get_counts() + # no changes because of deduplication in source table assert src == old_src assert a == old_a assert b == old_b assert c == old_c - old_src, old_a, old_b, old_c = src, a, b, c - # that issert fails on test_mv_b due to partitions by A, it is an uniq data which is not deduplicated + node.query( + """ + SET deduplicate_blocks_in_dependent_materialized_views = 1; + INSERT INTO test SELECT number FROM numbers(10); + """ + ) + src, a, b, c = get_counts() + assert src == 11 + assert a == old_a + 10 # first insert could be succesfull with disabled dedup + assert b == 11 + assert c == old_c + 10 + with pytest.raises(QueryRuntimeException): node.query( """ @@ -110,23 +82,16 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(100,10); """ ) - src, a, b, c = get_counts() - assert src == old_src + 10 - assert a == old_a + 10 - assert b == old_b - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - # deduplication for all tables, except test_mv_b. For test_mv_b it is an uniq data which is not deduplicated due to exception at previous insert node.query( """ SET deduplicate_blocks_in_dependent_materialized_views = 1; INSERT INTO test SELECT number FROM numbers(100,10); """ ) + src, a, b, c = get_counts() - assert src == old_src - assert a == old_a - assert b == old_b + 10 - assert c == old_c - old_src, old_a, old_b, old_c = src, a, b, c + assert src == 21 + assert a == old_a + 20 + assert b == 21 + assert c == old_c + 20 diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference index 9c9281dc7e4..adf6abb7298 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference @@ -1,7 +1,7 @@ 2 3 -3 +2 3 1 diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql index 51e6a513608..d3c4da86b41 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql @@ -29,7 +29,7 @@ INSERT INTO without_deduplication VALUES (43); SELECT count() FROM with_deduplication; SELECT count() FROM without_deduplication; --- Implicit insert isn't deduplicated, because deduplicate_blocks_in_dependent_materialized_views = 0 by default +-- Implicit insert isn't deduplicated SELECT ''; SELECT countMerge(cnt) FROM with_deduplication_mv; SELECT countMerge(cnt) FROM without_deduplication_mv; diff --git a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh index 8f7d19028b0..1fb219108da 100755 --- a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh +++ b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh @@ -36,8 +36,8 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE c" echo ${CLICKHOUSE_CLIENT} --query "CREATE TABLE root (d UInt64) ENGINE = Null" ${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW d (d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/d', '1') ORDER BY d AS SELECT * FROM root" -${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; -${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; ${CLICKHOUSE_CLIENT} --query "SELECT * FROM d"; ${CLICKHOUSE_CLIENT} --query "DROP TABLE root" ${CLICKHOUSE_CLIENT} --query "DROP TABLE d" diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index dadf2f35e6e..a9801e3b910 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -137,7 +137,7 @@ select arrayUniq(thread_ids) from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '0' and Settings['max_insert_threads'] = '16'; -18 +5 select count() from testX; 60 select count() from testXA; @@ -185,7 +185,7 @@ select arrayUniq(thread_ids) from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '1' and Settings['max_insert_threads'] = '16'; -18 +5 select count() from testX; 80 select count() from testXA; diff --git a/tests/queries/0_stateless/01927_query_views_log_current_database.sql b/tests/queries/0_stateless/01927_query_views_log_current_database.sql index 6287156daaf..ba42795333c 100644 --- a/tests/queries/0_stateless/01927_query_views_log_current_database.sql +++ b/tests/queries/0_stateless/01927_query_views_log_current_database.sql @@ -16,7 +16,6 @@ CREATE MATERIALIZED VIEW matview_b_to_c TO table_c AS SELECT SUM(a + sleepEachRo CREATE MATERIALIZED VIEW matview_join_d_e TO table_f AS SELECT table_d.a as a, table_e.count + sleepEachRow(0.000003) as count FROM table_d LEFT JOIN table_e ON table_d.a = table_e.a; -- ENABLE LOGS -SET parallel_view_processing=0; SET log_query_views=1; SET log_queries_min_type='QUERY_FINISH'; SET log_queries=1; diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference index 2d9f236ada9..e0cc8f0ce63 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference @@ -1,8 +1,8 @@ -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 -18 36 27 36 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data -18 18 18 18 -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 -18 36 27 36 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent +18 18 9 18 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent +18 9 9 9 +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent +18 18 9 18 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent 18 18 18 18 diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql index 465c8d6136c..fdd75b91b1f 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql @@ -1,6 +1,6 @@ -- Tags: long -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent'; drop table if exists test sync; drop table if exists test_mv_a sync; @@ -35,7 +35,7 @@ select (select sum(c) from test_mv_c where test='case1'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent'; set deduplicate_blocks_in_dependent_materialized_views=1; @@ -53,7 +53,7 @@ select (select sum(c) from test_mv_c where test='case2'); -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent'; set deduplicate_blocks_in_dependent_materialized_views=0; @@ -70,7 +70,7 @@ select (select sum(c) from test_mv_b where test='case3'), (select sum(c) from test_mv_c where test='case3'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent'; set deduplicate_blocks_in_dependent_materialized_views=1; diff --git a/tests/queries/0_stateless/02125_query_views_log.sql b/tests/queries/0_stateless/02125_query_views_log.sql index ba50902ebea..d2d19b76a1f 100644 --- a/tests/queries/0_stateless/02125_query_views_log.sql +++ b/tests/queries/0_stateless/02125_query_views_log.sql @@ -8,7 +8,7 @@ create table dst (key Int) engine=Null(); create materialized view mv1 to dst as select * from src; create materialized view mv2 to dst as select * from src; -insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=0; +insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=1; system flush logs; -- { echo } diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference index 07deb7c2565..335b55f05c8 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference @@ -10,14 +10,13 @@ 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 84 -2023-09-01 12:00:00 42 +2022-09-01 12:00:00 42 -- Original issue with deduplicate_blocks_in_dependent_materialized_views = 1 AND max_insert_delayed_streams_for_parallel_write > 1 -- Landing 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 84 +2022-09-01 12:00:00 42 2023-09-01 12:00:00 42 -- Regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184 -- Landing (Agg/Replacing)MergeTree diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql index a2378fd8f67..f206f0d7775 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql @@ -54,9 +54,8 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view - 1st insert works for landing and mv tables - 2nd insert gets first block 20220901 deduplicated and second one inserted in landing table - - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded, now that block is inserted because deduplicate_blocks_in_dependent_materialized_views=0 + - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded - Now it is fixed. */ SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 1000; @@ -98,7 +97,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view This is what happens now: - 1st insert works for landing and mv tables - - 2nd insert gets first block 20220901 deduplicated for landing and both rows are inserted for mv tables + - 2nd insert gets first block 20220901 deduplicated and second one inserted for landing and mv tables */ SET deduplicate_blocks_in_dependent_materialized_views = 1, max_insert_delayed_streams_for_parallel_write = 1000; diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python deleted file mode 100644 index dd1058518c9..00000000000 --- a/tests/queries/0_stateless/03008_deduplication.python +++ /dev/null @@ -1,657 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys -import argparse -import string - - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - - -def __format(template, **params): - field_names = [v[1] for v in string.Formatter().parse(template) if v[1] is not None] - kv_args = {} - for field in field_names: - if field in params: - kv_args[field] = params[field] - else: - kv_args[field] = "" - - return template.format(**kv_args) - - -def instance_create_statement( - table_name, - table_columns, - table_keys, - table_engine, - with_deduplication, - no_merges=True, -): - template = """ - CREATE TABLE {table_name} - {table_columns} - ENGINE = {table_engine} - ORDER BY {table_keys} - {table_settings}; - {table_no_merges} - """ - - params = dict() - params["table_name"] = table_name - params["table_columns"] = table_columns - params["table_keys"] = table_keys - params["table_no_merges"] = f"SYSTEM STOP MERGES {table_name};" if no_merges else "" - params["table_engine"] = ( - "MergeTree()" - if table_engine == "MergeTree" - else f"ReplicatedMergeTree('/clickhouse/tables/{{database}}/{table_name}', '1')" - ) - - deduplication_window_setting_name = ( - "non_replicated_deduplication_window" - if table_engine == "MergeTree" - else "replicated_deduplication_window" - ) - deduplication_window_setting_value = 1000 if with_deduplication else 0 - - settings = list() - settings += [ - f"{deduplication_window_setting_name}={deduplication_window_setting_value}" - ] - params["table_settings"] = "SETTINGS " + ",".join(settings) - - return __format(template, **params) - - -def instance_insert_statement( - table_name, count, insert_method, insert_unique_blocks, use_insert_token -): - insert_settings = ( - "" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'" - ) - - if insert_method == "InsertSelect": - template = """ - INSERT INTO {table_name} - SELECT {insert_columns} - FROM numbers({count}) {insert_settings}; - """ - return __format( - template, - table_name=table_name, - count=count, - insert_columns="'src_4', 4" - if not insert_unique_blocks - else "'src_' || toString(number), number", - insert_settings=insert_settings, - ) - - else: - template = """ - INSERT INTO {table_name} - {insert_settings} VALUES {insert_values}; - """ - - values = [] - for i in range(count): - values += ( - [f"('src_{i}', {i})"] if insert_unique_blocks else ["('src_4', 4)"] - ) - insert_values = ", ".join(values) - - return __format( - template, - table_name=table_name, - insert_settings=insert_settings, - insert_values=insert_values, - ) - - -def get_drop_tables_statements(tables): - return "".join( - [f"DROP TABLE IF EXISTS {table_name};\n" for table_name in tables[::-1]] - ) - - -def get_logs_statement(args): - if args.get_logs: - return "SET send_logs_level='test';" - return "" - - -def str2bool(v): - if isinstance(v, bool): - return v - if v.lower() in ("yes", "true", "t", "y", "1"): - return True - elif v.lower() in ("no", "false", "f", "n", "0"): - return False - else: - raise argparse.ArgumentTypeError("Boolean value expected.") - - -class ArgsFactory: - def __init__(self, parser): - self.__parser = parser - - def add_opt_engine(self): - self.__parser.add_argument( - "--table-engine", - choices=["ReplicatedMergeTree", "MergeTree"], - default="MergeTree", - ) - - def add_opt_user_token(self): - self.__parser.add_argument( - "--use-insert-token", type=str2bool, nargs="?", const=True, default=False - ) - - def add_opt_single_thread(self): - self.__parser.add_argument( - "--single-thread", type=str2bool, nargs="?", const=True, default=True - ) - - def add_opt_dedup_src(self): - self.__parser.add_argument( - "--deduplicate-src-table", - type=str2bool, - nargs="?", - const=True, - default=True, - ) - - def add_opt_dedup_dst(self): - self.__parser.add_argument( - "--deduplicate-dst-table", - type=str2bool, - nargs="?", - const=True, - default=True, - ) - - def add_opt_get_logs(self): - self.__parser.add_argument( - "--get-logs", type=str2bool, nargs="?", const=True, default=False - ) - - def add_opt_uniq_blocks(self): - self.__parser.add_argument( - "--insert-unique-blocks", type=str2bool, nargs="?", const=True, default=True - ) - - def add_opt_insert_method(self): - self.__parser.add_argument( - "--insert-method", - choices=["InsertSelect", "InsertValues"], - default="InsertSelect", - ) - - def add_all(self): - self.add_opt_engine() - self.add_opt_user_token() - self.add_opt_single_thread() - self.add_opt_dedup_src() - self.add_opt_dedup_dst() - self.add_opt_get_logs() - self.add_opt_insert_method() - self.add_opt_uniq_blocks() - - -def test_insert_several_blocks(parser): - ArgsFactory(parser).add_all() - - def calle(args): - create_table_a_b_statement = instance_create_statement( - table_name="table_a_b", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_src_table, - ) - - create_table_when_b_even_statement = instance_create_statement( - table_name="table_when_b_even", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_dst_table, - ) - - create_mv_statement = """ - CREATE MATERIALIZED VIEW mv_b_even - TO table_when_b_even - AS - SELECT a, b - FROM table_a_b - WHERE b % 2 = 0; - """ - - drop_tables_statements = get_drop_tables_statements( - ["table_a_b", "table_when_b_even", "mv_b_even"] - ) - - insert_statement = instance_insert_statement( - "table_a_b", - 10, - args.insert_method, - args.insert_unique_blocks, - args.use_insert_token, - ) - - print_details_statements = f""" - SELECT 'table_a_b'; - SELECT 'count', count() FROM table_a_b; - {"" if not args.get_logs else "SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part;"} - - SELECT 'table_when_b_even'; - SELECT 'count', count() FROM table_when_b_even; - {"" if not args.get_logs else "SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part;"} - """ - - if args.insert_unique_blocks: - assert_first_insert_statements = f""" - SELECT throwIf( count() != 10 ) - FROM table_a_b; - SELECT throwIf( count() != 5 ) - FROM table_when_b_even; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) - FROM table_a_b; - SELECT throwIf( count() != {5 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even; - """ - else: - if args.use_insert_token: - assert_first_insert_statements = """ - SELECT throwIf( count() != 10 ) - FROM table_a_b; - SELECT throwIf( count() != 10 ) - FROM table_when_b_even; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) - FROM table_a_b; - SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even; - """ - else: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 20} ) - FROM table_a_b; - SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even; - """ - - script = f""" - {get_logs_statement(args)} - - SET max_insert_threads={1 if args.single_thread else 10}; - SET update_insert_deduplication_token_in_dependent_materialized_views=1; - SET deduplicate_blocks_in_dependent_materialized_views=1; - - SET max_block_size=1; - SET min_insert_block_size_rows=0; - SET min_insert_block_size_bytes=0; - - {drop_tables_statements} - - {create_table_a_b_statement} - - {create_table_when_b_even_statement} - - {create_mv_statement} - - -- first insert - {insert_statement} - - {print_details_statements} - - {assert_first_insert_statements} - - -- second insert, it is retry - {insert_statement} - - {print_details_statements} - - {assert_second_insert_statements} - - {drop_tables_statements} - """ - - print(script) - - parser.set_defaults(func=calle) - - -def test_mv_generates_several_blocks(parser): - ArgsFactory(parser).add_all() - - def calle(args): - tables = [ - "table_for_join_with", - "table_a_b", - "table_when_b_even_and_joined", - "mv_b_even", - ] - drop_tables_statements = get_drop_tables_statements(tables) - - details_print_for_table_for_join_with = "" - if args.get_logs: - details_print_for_table_for_join_with = """ - SELECT 'table_for_join_with'; - SELECT a_join, b, _part FROM table_for_join_with ORDER BY _part, a_join, b; - """ - - create_table_a_b_statement = instance_create_statement( - table_name="table_a_b", - table_columns="(a_src String, b UInt64)", - table_keys="(a_src, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_src_table, - ) - - create_table_when_b_even_and_joined_statement = instance_create_statement( - table_name="table_when_b_even_and_joined", - table_columns="(a_src String, a_join String, b UInt64)", - table_keys="(a_src, a_join, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_dst_table, - ) - - insert_statement = instance_insert_statement( - "table_a_b", - 5, - args.insert_method, - args.insert_unique_blocks, - args.use_insert_token, - ) - - details_print_statements = f""" - SELECT 'table_a_b'; - SELECT 'count', count() FROM table_a_b; - - SELECT 'table_when_b_even_and_joined'; - SELECT 'count', count() FROM table_when_b_even_and_joined; - {"" if not args.get_logs else "SELECT _part, a_src, a_join, b FROM table_when_b_even_and_joined ORDER BY _part;"} - """ - - if args.insert_unique_blocks: - assert_first_insert_statements = f""" - SELECT throwIf( count() != 5 ) - FROM table_a_b; - - SELECT throwIf( count() != 9 ) - FROM table_when_b_even_and_joined; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - - SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 18} ) - FROM table_when_b_even_and_joined; - """ - else: - if args.use_insert_token: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {5 if args.deduplicate_src_table else 5} ) - FROM table_a_b; - - SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even_and_joined; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - - SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even_and_joined; - """ - else: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 5} ) - FROM table_a_b; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even_and_joined; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even_and_joined; - """ - - script = f""" - {get_logs_statement(args)} - - SET max_insert_threads={1 if args.single_thread else 10}; - SET update_insert_deduplication_token_in_dependent_materialized_views=1; - SET deduplicate_blocks_in_dependent_materialized_views=1; - - SET max_block_size=1; - SET min_insert_block_size_rows=0; - SET min_insert_block_size_bytes=0; - - {drop_tables_statements} - - CREATE TABLE table_for_join_with - (a_join String, b UInt64) - ENGINE = MergeTree() - ORDER BY (a_join, b); - INSERT INTO table_for_join_with - SELECT 'joined_' || toString(number), number - FROM numbers(1); - {details_print_for_table_for_join_with} - - {create_table_a_b_statement} - SYSTEM STOP MERGES table_a_b; - - {create_table_when_b_even_and_joined_statement} - SYSTEM STOP MERGES table_when_b_even_and_joined; - - CREATE MATERIALIZED VIEW mv_b_even - TO table_when_b_even_and_joined - AS - SELECT a_src, a_join, table_for_join_with.b as b - FROM table_a_b - FULL OUTER JOIN table_for_join_with - ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 - ORDER BY a_src, a_join, b; - - -- first insert - {insert_statement} - - {details_print_statements} - - -- first assertion - {assert_first_insert_statements} - - -- second insert - {insert_statement} - - {details_print_statements} - - -- second assertion - {assert_second_insert_statements} - - {drop_tables_statements} - """ - - print(script) - - parser.set_defaults(func=calle) - - -def test_several_mv_into_one_table(parser): - ArgsFactory(parser).add_all() - - def calle(args): - tables = ["table_src", "table_dst", "mv_b_even", "mv_b_even_even"] - drop_tables_statements = get_drop_tables_statements(tables) - - create_table_src_statement = instance_create_statement( - table_name="table_src", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_src_table, - ) - - create_table_dst_statement = instance_create_statement( - table_name="table_dst", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_dst_table, - ) - - insert_statement = instance_insert_statement( - "table_src", - 8, - args.insert_method, - args.insert_unique_blocks, - args.use_insert_token, - ) - - details_print_statements = f""" - SELECT 'table_src count', count() FROM table_src; - - SELECT 'table_dst count', count() FROM table_dst; - {"" if not args.get_logs else "SELECT _part, count() FROM table_dst GROUP BY _part ORDER BY _part;"} - """ - - if args.insert_unique_blocks: - assert_first_insert_statements = f""" - SELECT throwIf( count() != 8 ) - FROM table_src; - - SELECT throwIf( count() != 6 ) - FROM table_dst; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) - FROM table_src; - - SELECT throwIf( count() != {6 if args.deduplicate_dst_table else 12} ) - FROM table_dst; - """ - else: - if args.use_insert_token: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {8 if args.deduplicate_src_table else 8} ) - FROM table_src; - - SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 16} ) - FROM table_dst; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) - FROM table_src; - - SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 32} ) - FROM table_dst; - """ - else: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 8} ) - FROM table_src; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 16} ) - FROM table_dst; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 16} ) - FROM table_src; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 32} ) - FROM table_dst; - """ - - script = f""" - {get_logs_statement(args)} - - SET max_insert_threads={1 if args.single_thread else 10}; - SET update_insert_deduplication_token_in_dependent_materialized_views=1; - SET deduplicate_blocks_in_dependent_materialized_views=1; - - SET max_block_size=1; - SET min_insert_block_size_rows=0; - SET min_insert_block_size_bytes=0; - - {drop_tables_statements} - - {create_table_src_statement} - - {create_table_dst_statement} - - CREATE MATERIALIZED VIEW mv_b_even - TO table_dst - AS - SELECT a, b - FROM table_src - WHERE b % 2 = 0; - - CREATE MATERIALIZED VIEW mv_b_even_even - TO table_dst - AS - SELECT a, b - FROM table_src - WHERE b % 4 = 0; - - -- first insert - {insert_statement} - - {details_print_statements} - - {assert_first_insert_statements} - - -- second insert, retry - {insert_statement} - - {details_print_statements} - - {assert_second_insert_statements} - - {drop_tables_statements} - """ - - print(script) - - parser.set_defaults(func=calle) - - -def parse_args(): - parser = argparse.ArgumentParser() - subparsers = parser.add_subparsers(dest="test") - test_insert_several_blocks( - subparsers.add_parser("insert_several_blocks_into_table") - ) - test_mv_generates_several_blocks( - subparsers.add_parser("mv_generates_several_blocks") - ) - test_several_mv_into_one_table(subparsers.add_parser("several_mv_into_one_table")) - args = parser.parse_args() - if args.test is None: - parser.print_help() - return args - - -def main(): - args = parse_args() - if args.test is not None: - args.func(args) - - -if __name__ == "__main__": - main() diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference deleted file mode 100644 index 4893274c1cd..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference +++ /dev/null @@ -1,41 +0,0 @@ -Different materialized view insert into one underlayed table equal data. -first attempt -from dst 1 A all_1_1_0 -from mv_dst 0 A all_1_1_0 -from mv_dst 0 A all_2_2_0 -second attempt -from dst 1 A all_1_1_0 -from mv_dst 0 A all_1_1_0 -from mv_dst 0 A all_2_2_0 -Different insert operations generate the same data after transformation in underlied table of materialized view. -first attempt -from dst 1 A all_1_1_0 -from mv_dst 0 A all_1_1_0 -second attempt -from dst 1 A all_1_1_0 -from dst 2 A all_2_2_0 -from mv_dst 0 A all_1_1_0 -from mv_dst 0 A all_2_2_0 -Indentical blocks in insertion with `insert_deduplication_token` -first attempt -from dst 0 A all_1_1_0 -from dst 0 A all_2_2_0 -second attempt -from dst 0 A all_1_1_0 -from dst 0 A all_2_2_0 -third attempt -from dst 0 A all_1_1_0 -from dst 0 A all_2_2_0 -Indentical blocks in insertion -from dst 0 A all_1_1_0 -Indentical blocks after materialised view`s transformation -first attempt -from dst 1 B all_1_1_0 -from dst 2 B all_2_2_0 -from mv_dst 0 B all_1_1_0 -from mv_dst 0 B all_2_2_0 -second attempt -from dst 1 B all_1_1_0 -from dst 2 B all_2_2_0 -from mv_dst 0 B all_1_1_0 -from mv_dst 0 B all_2_2_0 diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql deleted file mode 100644 index 7927a6b1edf..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql +++ /dev/null @@ -1,331 +0,0 @@ --- ######### -select 'Different materialized view insert into one underlayed table equal data.'; - -DROP TABLE IF EXISTS dst; -DROP TABLE IF EXISTS mv_dst; -DROP TABLE IF EXISTS mv_first; -DROP TABLE IF EXISTS mv_second; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE TABLE mv_dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE MATERIALIZED VIEW mv_first -TO mv_dst -AS SELECT - 0 AS key, - value AS value -FROM dst; - -CREATE MATERIALIZED VIEW mv_second -TO mv_dst -AS SELECT - 0 AS key, - value AS value -FROM dst; - -SET deduplicate_blocks_in_dependent_materialized_views=1; - -select 'first attempt'; - -INSERT INTO dst VALUES (1, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst VALUES (1, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -DROP TABLE mv_second; -DROP TABLE mv_first; -DROP TABLE mv_dst; -DROP TABLE dst; - - --- ######### -select 'Different insert operations generate the same data after transformation in underlied table of materialized view.'; - -DROP TABLE IF EXISTS dst; -DROP TABLE IF EXISTS mv_dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE MATERIALIZED VIEW mv_dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000 -AS SELECT - 0 AS key, - value AS value -FROM dst; - -SET deduplicate_blocks_in_dependent_materialized_views=1; - -select 'first attempt'; - -INSERT INTO dst VALUES (1, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst VALUES (2, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -DROP TABLE mv_dst; -DROP TABLE dst; - - --- ######### -select 'Indentical blocks in insertion with `insert_deduplication_token`'; - -DROP TABLE IF EXISTS dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - -select 'first attempt'; - -INSERT INTO dst SELECT - 0 AS key, - 'A' AS value -FROM numbers(2) -SETTINGS insert_deduplication_token='some_user_token'; - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst SELECT - 0 AS key, - 'A' AS value -FROM numbers(2) -SETTINGS insert_deduplication_token='some_user_token'; - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -select 'third attempt'; - -INSERT INTO dst SELECT - 1 AS key, - 'b' AS value -FROM numbers(2) -SETTINGS insert_deduplication_token='some_user_token'; - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -DROP TABLE dst; - - --- ######### -select 'Indentical blocks in insertion'; - -DROP TABLE IF EXISTS dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - -INSERT INTO dst SELECT - 0 AS key, - 'A' AS value -FROM numbers(2); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -DROP TABLE dst; - - --- ######### -select 'Indentical blocks after materialised view`s transformation'; - -DROP TABLE IF EXISTS dst; -DROP TABLE IF EXISTS mv_dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE MATERIALIZED VIEW mv_dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000 -AS SELECT - 0 AS key, - value AS value -FROM dst; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - -SET deduplicate_blocks_in_dependent_materialized_views=1; - -select 'first attempt'; - -INSERT INTO dst SELECT - number + 1 AS key, - IF(key = 0, 'A', 'B') AS value -FROM numbers(2); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst SELECT - number + 1 AS key, - IF(key = 0, 'A', 'B') AS value -FROM numbers(2); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -DROP TABLE mv_dst; -DROP TABLE dst; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference deleted file mode 100644 index c82a6eaa213..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference +++ /dev/null @@ -1,35 +0,0 @@ -no user deduplication token -partitioned_table is deduplicated bacause deduplication works in scope of one partiotion: -1 A -1 D -2 B -2 C -mv_table is not deduplicated because the inserted blocks was different: -1 A -1 A -1 D -2 B -2 B -2 C -with user deduplication token -partitioned_table is not deduplicated because different tokens: -1 A -1 A -1 D -2 B -2 B -2 C -mv_table is not deduplicated because different tokens: -1 A -1 A -1 D -2 B -2 B -2 C -with incorrect ussage of user deduplication token -partitioned_table is deduplicated because equal tokens: -1 A -2 B -mv_table is deduplicated because equal tokens: -1 A -2 B diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql deleted file mode 100644 index 2eb931f7f73..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql +++ /dev/null @@ -1,83 +0,0 @@ -DROP TABLE IF EXISTS partitioned_table; -DROP TABLE IF EXISTS mv_table; - - -SET deduplicate_blocks_in_dependent_materialized_views = 1; - - -SELECT 'no user deduplication token'; - -CREATE TABLE partitioned_table - (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') - partition by key % 10 - order by tuple(); - -CREATE MATERIALIZED VIEW mv_table (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') - ORDER BY tuple() - AS SELECT key, value FROM partitioned_table; - -INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'B'); -INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'C'); -INSERT INTO partitioned_table VALUES (1, 'D'), (2, 'B'); - -SELECT 'partitioned_table is deduplicated bacause deduplication works in scope of one partiotion:'; -SELECT * FROM partitioned_table ORDER BY ALL; -SELECT 'mv_table is not deduplicated because the inserted blocks was different:'; -SELECT * FROM mv_table ORDER BY ALL; - -DROP TABLE partitioned_table; -DROP TABLE mv_table; - - -SELECT 'with user deduplication token'; - -CREATE TABLE partitioned_table - (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') - partition by key % 10 - order by tuple(); - -CREATE MATERIALIZED VIEW mv_table (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') - ORDER BY tuple() - AS SELECT key, value FROM partitioned_table; - -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_1' VALUES (1, 'A'), (2, 'B'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_2' VALUES (1, 'A'), (2, 'C'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_3' VALUES (1, 'D'), (2, 'B'); - -SELECT 'partitioned_table is not deduplicated because different tokens:'; -SELECT * FROM partitioned_table ORDER BY ALL; -SELECT 'mv_table is not deduplicated because different tokens:'; -SELECT * FROM mv_table ORDER BY ALL; - -DROP TABLE partitioned_table; -DROP TABLE mv_table; - - -SELECT 'with incorrect ussage of user deduplication token'; - -CREATE TABLE partitioned_table - (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') - partition by key % 10 - order by tuple(); - -CREATE MATERIALIZED VIEW mv_table (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') - ORDER BY tuple() - AS SELECT key, value FROM partitioned_table; - -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'B'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'C'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'D'), (2, 'B'); - -SELECT 'partitioned_table is deduplicated because equal tokens:'; -SELECT * FROM partitioned_table ORDER BY ALL; -SELECT 'mv_table is deduplicated because equal tokens:'; -SELECT * FROM mv_table ORDER BY ALL; - -DROP TABLE partitioned_table; -DROP TABLE mv_table; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference deleted file mode 100644 index bf900aa84d2..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh deleted file mode 100755 index 49eb52b47fd..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="MergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference deleted file mode 100644 index c815324b455..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh deleted file mode 100755 index 53af06d4a6f..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="ReplicatedMergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference deleted file mode 100644 index 6e76ec46aa8..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh deleted file mode 100755 index 7d4f5240cd1..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="MergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference deleted file mode 100644 index a25e8713c61..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh deleted file mode 100755 index 109d1674f3a..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="ReplicatedMergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 20: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference deleted file mode 100644 index b6a3e0175a7..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference +++ /dev/null @@ -1,706 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh deleted file mode 100755 index fe3d610a758..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="MergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference deleted file mode 100644 index 1921103f49e..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference +++ /dev/null @@ -1,706 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh deleted file mode 100755 index 9adee6d53d4..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="ReplicatedMergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03035_max_insert_threads_support.sh b/tests/queries/0_stateless/03035_max_insert_threads_support.sh index cedb651a430..1e6bfb414d8 100755 --- a/tests/queries/0_stateless/03035_max_insert_threads_support.sh +++ b/tests/queries/0_stateless/03035_max_insert_threads_support.sh @@ -8,7 +8,7 @@ DATA_FILE="data_$CLICKHOUSE_TEST_UNIQUE_NAME.csv" $CLICKHOUSE_CLIENT --max_insert_threads=4 --query=" EXPLAIN PIPELINE INSERT INTO FUNCTION file('$DATA_FILE') SELECT * FROM numbers_mt(1000000) ORDER BY number DESC -" | grep -o StorageFileSink | wc -l +" | grep -o MaterializingTransform | wc -l DATA_FILE_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path from file('$DATA_FILE', 'One')") rm $DATA_FILE_PATH From 7180ae03467b05fb0495d744c066d4df758c37a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Fri, 5 Jul 2024 13:18:48 +0000 Subject: [PATCH 405/439] Add `use_same_s3_credentials_for_base_backup` to docs --- docs/en/operations/backup.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 2ba50b39934..7c102c38fa6 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -84,6 +84,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des - [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level - `password` for the file on disk - `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')` + - `use_same_s3_credentials_for_base_backup`: whether base backup to S3 should inherit credentials from the query. Only works with `S3`. - `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables - `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family. - `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD` From d3f23c2753ff2b1ac935268c1af0609616381782 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 5 Jul 2024 13:29:34 +0000 Subject: [PATCH 406/439] Bump s2geometry again --- contrib/s2geometry | 2 +- contrib/s2geometry-cmake/CMakeLists.txt | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/contrib/s2geometry b/contrib/s2geometry index 0146e2d1355..6522a40338d 160000 --- a/contrib/s2geometry +++ b/contrib/s2geometry @@ -1 +1 @@ -Subproject commit 0146e2d1355828f8f633cb050948250ad7406c57 +Subproject commit 6522a40338d58752c2a4227a3fc2bc4107c73e43 diff --git a/contrib/s2geometry-cmake/CMakeLists.txt b/contrib/s2geometry-cmake/CMakeLists.txt index 5eabe71b538..48562b8cead 100644 --- a/contrib/s2geometry-cmake/CMakeLists.txt +++ b/contrib/s2geometry-cmake/CMakeLists.txt @@ -1,7 +1,6 @@ option(ENABLE_S2_GEOMETRY "Enable S2 Geometry" ${ENABLE_LIBRARIES}) -# ARCH_S390X broke upstream, it can be re-enabled once https://github.com/google/s2geometry/pull/372 is merged -if (NOT ENABLE_S2_GEOMETRY OR ARCH_S390X) +if (NOT ENABLE_S2_GEOMETRY) message(STATUS "Not using S2 Geometry") return() endif() From 40fd1502794a199d576ae8c57c5e1d08238b00f7 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 5 Jul 2024 15:30:32 +0200 Subject: [PATCH 407/439] Switch submodule contrib/orc to proper commit in the main branch. --- contrib/orc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/orc b/contrib/orc index 947cebaf943..bcc025c0982 160000 --- a/contrib/orc +++ b/contrib/orc @@ -1 +1 @@ -Subproject commit 947cebaf9432d708253ac08dc3012daa6b4ede6f +Subproject commit bcc025c09828c556f54cfbdf83a66b9acae7d17f From 3a79b9dc8f672e3993057c989618bd7f17d622de Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 5 Jul 2024 15:35:00 +0200 Subject: [PATCH 408/439] start and end variants also accept tuple of parameters --- .../functions/time-window-functions.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md index bad545fc5a5..5169d4487ec 100644 --- a/docs/en/sql-reference/functions/time-window-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -50,7 +50,6 @@ Returns the inclusive lower bound of the corresponding [tumbling window](#tumble **Syntax** ``` sql -tumbleStart(bounds_tuple); tumbleStart(time_attr, interval [, timezone]); ``` @@ -60,6 +59,8 @@ tumbleStart(time_attr, interval [, timezone]); - `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + **Returned values** - The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). @@ -87,7 +88,6 @@ Returns the exclusive upper bound of the corresponding [tumbling window](#tumble **Syntax** ``` sql -tumbleEnd(bounds_tuple); tumbleEnd(time_attr, interval [, timezone]); ``` @@ -97,6 +97,8 @@ tumbleEnd(time_attr, interval [, timezone]); - `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + **Returned values** - The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). @@ -163,7 +165,6 @@ Returns the inclusive lower bound of the corresponding [hopping window](#hop). **Syntax** ``` sql -hopStart(bounds_tuple); hopStart(time_attr, hop_interval, window_interval [, timezone]); ``` **Arguments** @@ -173,6 +174,8 @@ hopStart(time_attr, hop_interval, window_interval [, timezone]); - `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + **Returned values** - The inclusive lower bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). @@ -204,7 +207,6 @@ Returns the exclusive upper bound of the corresponding [hopping window](#hop). **Syntax** ``` sql -hopEnd(bounds_tuple); hopEnd(time_attr, hop_interval, window_interval [, timezone]); ``` **Arguments** @@ -214,6 +216,8 @@ hopEnd(time_attr, hop_interval, window_interval [, timezone]); - `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + **Returned values** - The exclusive upper bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). @@ -236,6 +240,7 @@ Result: ┌─hopEnd(now(), toIntervalDay('1'), toIntervalDay('2'))─┐ │ 2024-07-05 00:00:00 │ └───────────────────────────────────────────────────────┘ + ``` ## Related content From 75d3ffd8df5d0910ad02bab6260d6d637c647391 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 5 Jul 2024 15:13:11 +0100 Subject: [PATCH 409/439] Fix style --- src/Client/HedgedConnections.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 9211f39f0ae..4effc3adafa 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -200,7 +200,7 @@ void HedgedConnections::sendQuery( /// In other words, the initiator always controls whether the analyzer enabled or not for /// all servers involved in the distributed query processing. modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); - + replica.connection->sendQuery( timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {}); replica.change_replica_timeout.setRelative(timeouts.receive_data_timeout); From 964047bf8c676c520d28d30da7ed237af76147df Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 5 Jul 2024 16:31:00 +0200 Subject: [PATCH 410/439] Yarrrr --- cmake/target.cmake | 2 +- docker/test/fasttest/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/target.cmake b/cmake/target.cmake index d6c497955f6..3d0ecd032f9 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -84,5 +84,5 @@ if (CMAKE_CROSSCOMPILING) message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!") endif () - message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILE_TARGET}") + message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILER_TARGET}") endif () diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index cdbfc3f0beb..2512268be0f 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -39,7 +39,7 @@ RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s # LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path. # It's very dirty workaround, better to build compiler and LLVM ourself and use it. Details: https://github.com/llvm/llvm-project/issues/95792 -RUN test ! -d /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu || mv /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/x86_64-unknown-linux-gnu +RUN test ! -d /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu || ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/x86_64-unknown-linux-gnu ARG CCACHE_VERSION=4.6.1 RUN mkdir /tmp/ccache \ From 1b1922a2e022211a150157b5d0861f1547d69de7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 5 Jul 2024 17:44:44 +0200 Subject: [PATCH 411/439] Fix issue in SumIfToCountIfVisitor and signed integers --- src/Analyzer/Passes/SumIfToCountIfPass.cpp | 2 +- ...201_sumIf_to_countIf_return_type.reference | 24 +++++++++++++++++++ .../03201_sumIf_to_countIf_return_type.sql | 2 ++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.reference create mode 100644 tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.sql diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index f52d724f346..e5ee8a0d0b2 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -33,7 +33,7 @@ public: return; auto * function_node = node->as(); - if (!function_node || !function_node->isAggregateFunction()) + if (!function_node || !function_node->isAggregateFunction() || !function_node->getResultType()->equals(DataTypeUInt64())) return; auto function_name = function_node->getFunctionName(); diff --git a/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.reference b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.reference new file mode 100644 index 00000000000..62f5eb45106 --- /dev/null +++ b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.reference @@ -0,0 +1,24 @@ +QUERY id: 0 + PROJECTION COLUMNS + (sumIf(toInt64(1), 1)) Tuple(Int64) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: tuple, function_type: ordinary, result_type: Tuple(Int64) + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: sumIf, function_type: aggregate, result_type: Int64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: Int64_1, constant_value_type: Int64 + EXPRESSION + FUNCTION id: 7, function_name: toInt64, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 8, nodes: 1 + CONSTANT id: 9, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 11, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 12, nodes: 1 + CONSTANT id: 13, constant_value: UInt64_100, constant_value_type: UInt8 + SETTINGS optimize_rewrite_sum_if_to_count_if=1 diff --git a/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.sql b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.sql new file mode 100644 index 00000000000..24369fd6497 --- /dev/null +++ b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.sql @@ -0,0 +1,2 @@ +SET allow_experimental_analyzer = 1; +EXPLAIN QUERY TREE SELECT tuple(sumIf(toInt64(1), 1)) FROM numbers(100) settings optimize_rewrite_sum_if_to_count_if=1; From 5697efa191bc0e50a9a732f59c905523341c7a1c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 5 Jul 2024 17:09:17 +0000 Subject: [PATCH 412/439] fix for const arrays --- src/Functions/array/arrayIndex.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index 111ab92b006..0782f109187 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -13,6 +14,8 @@ #include #include #include +#include "Common/Logger.h" +#include "Common/logger_useful.h" #include #include #include @@ -712,6 +715,7 @@ private: auto right = recursiveRemoveLowCardinality(right_const->getDataColumnPtr()); UInt64 index = 0; + UInt64 left_size = arguments[0].column->size(); ResultColumnPtr col_result = ResultColumnType::create(); if (!right->isNullAt(0)) @@ -732,6 +736,10 @@ private: else { col_result->getData().resize_fill(col_array->size()); + + if (col_array_const) + return ColumnConst::create(std::move(col_result), left_size); + return col_result; } } @@ -745,7 +753,7 @@ private: nullptr); if (col_array_const) - return ColumnConst::create(std::move(col_result), arguments[0].column->size()); + return ColumnConst::create(std::move(col_result), left_size); return col_result; } From c327f49f0cba7f3c1a8dab00bb355b0c6a921650 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 5 Jul 2024 20:04:43 +0200 Subject: [PATCH 413/439] add fullHostName which is an alias --- docs/en/sql-reference/functions/other-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 58fc1eba02e..95ef5e7bd21 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -86,7 +86,7 @@ Returns the fully qualified domain name of the ClickHouse server. fqdn(); ``` -This function is case-insensitive. +Aliases: `fullHostName`, 'FQDN'. **Returned value** From a502933f5ea33f9f7ee07944afbd926d21603731 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 5 Jul 2024 20:09:08 +0200 Subject: [PATCH 414/439] Fix clang-tidy error in BufferWithOwnMemory.h --- src/IO/BufferWithOwnMemory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 0ec733f7840..da38bccdea1 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -44,7 +44,7 @@ struct Memory : boost::noncopyable, Allocator char * m_data = nullptr; size_t alignment = 0; - [[maybe_unused]] bool allow_gwp_asan_force_sample; + [[maybe_unused]] bool allow_gwp_asan_force_sample{false}; Memory() = default; From 937ce8f780bb8d5ebb6ba115ee2d0c681a053299 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 5 Jul 2024 21:50:56 +0200 Subject: [PATCH 415/439] add aggThrow function to docs --- .../aggregate-functions/reference/aggthrow.md | 37 +++++++++++++++++++ .../aggregate-functions/reference/index.md | 1 + 2 files changed, 38 insertions(+) create mode 100644 docs/en/sql-reference/aggregate-functions/reference/aggthrow.md diff --git a/docs/en/sql-reference/aggregate-functions/reference/aggthrow.md b/docs/en/sql-reference/aggregate-functions/reference/aggthrow.md new file mode 100644 index 00000000000..fdbfd5b9e41 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/aggthrow.md @@ -0,0 +1,37 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/aggthrow +sidebar_position: 101 +--- + +# aggThrow + +This function can be used for the purpose of testing exception safety. It will throw an exception on creation with the specified probability. + +**Syntax** + +```sql +aggThrow(throw_prob) +``` + +**Arguments** + +- `throw_prob` — Probability to throw on creation. [Float64](../../data-types/float.md). + +**Returned value** + +- An exception: `Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully`. + +**Example** + +Query: + +```sql +SELECT number % 2 AS even, aggThrow(number) FROM numbers(10) GROUP BY even; +``` + +Result: + +```response +Received exception: +Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully: While executing AggregatingTransform. (AGGREGATE_FUNCTION_THROW) +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index e3725b6a430..b0e5582bd87 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -43,6 +43,7 @@ Standard aggregate functions: ClickHouse-specific aggregate functions: +- [aggThrow](../reference/aggthrow.md) - [analysisOfVariance](../reference/analysis_of_variance.md) - [any](../reference/any_respect_nulls.md) - [anyHeavy](../reference/anyheavy.md) From f94076ce8f7ea8c6eeec3637d72e67b6dd6fb1d1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 5 Jul 2024 22:20:46 +0200 Subject: [PATCH 416/439] Fix clang tidy --- src/IO/BufferWithOwnMemory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 0ec733f7840..da38bccdea1 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -44,7 +44,7 @@ struct Memory : boost::noncopyable, Allocator char * m_data = nullptr; size_t alignment = 0; - [[maybe_unused]] bool allow_gwp_asan_force_sample; + [[maybe_unused]] bool allow_gwp_asan_force_sample{false}; Memory() = default; From d4a4e4ec2dd8a8a141a705ea6b90506bb486b7e8 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Fri, 5 Jul 2024 22:42:22 +0000 Subject: [PATCH 417/439] Fix crash when adding empty tuple to query cache --- src/Columns/ColumnTuple.cpp | 8 +++++++- .../0_stateless/03201_query_cache_empty_tuple.reference | 2 ++ .../queries/0_stateless/03201_query_cache_empty_tuple.sql | 2 ++ 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03201_query_cache_empty_tuple.reference create mode 100644 tests/queries/0_stateless/03201_query_cache_empty_tuple.sql diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 2159495b68f..f262a8676b7 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -711,7 +711,13 @@ void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_c ColumnPtr ColumnTuple::compress() const { if (columns.empty()) - return Ptr(); + { + return ColumnCompressed::create(size(), 0, + [n = column_length] + { + return ColumnTuple::create(n); + }); + } size_t byte_size = 0; Columns compressed; diff --git a/tests/queries/0_stateless/03201_query_cache_empty_tuple.reference b/tests/queries/0_stateless/03201_query_cache_empty_tuple.reference new file mode 100644 index 00000000000..50e44edaecb --- /dev/null +++ b/tests/queries/0_stateless/03201_query_cache_empty_tuple.reference @@ -0,0 +1,2 @@ +() 0 +() 0 diff --git a/tests/queries/0_stateless/03201_query_cache_empty_tuple.sql b/tests/queries/0_stateless/03201_query_cache_empty_tuple.sql new file mode 100644 index 00000000000..8e133143ef8 --- /dev/null +++ b/tests/queries/0_stateless/03201_query_cache_empty_tuple.sql @@ -0,0 +1,2 @@ +SELECT tuple(), 0 FROM numbers(1) SETTINGS use_query_cache = true; +SELECT tuple(), 0 FROM numbers(1) SETTINGS use_query_cache = true; From ffd6bf28b1f844c360cd054e55dcedb325854d53 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 6 Jul 2024 07:42:15 +0200 Subject: [PATCH 418/439] tests: fix 01563_distributed_query_finish flakiness (due to system.*_log_sender) From CI logs [1], during this test was executing on server: 2024.07.05 19:29:45.856853 [ 1328 ] {} system.zookeeper_log_sender.DistributedInsertQueue.default: Code: 210. DB::NetException: Connection reset by peer, while writing to socket (172.17.0.2:38546 -> 3.16.142.177:9440): While sending /var/lib/clickhouse/store/aa8/aa8f6e66-486b-4dc3-85a1-4941e69cb99f/shard1_replica1/447.bin. (NETWORK_ERROR), Stack trace (when copying this message, always include the lines below): [1]: https://s3.amazonaws.com/clickhouse-test-reports/66162/daae5d4d4661c780b6368950ec484415ca3a0492/stateless_tests__aarch64_.html So let's add retries Signed-off-by: Azat Khuzhin --- .../01563_distributed_query_finish.reference | 1 - .../01563_distributed_query_finish.sh | 29 +++++++++++-------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/tests/queries/0_stateless/01563_distributed_query_finish.reference b/tests/queries/0_stateless/01563_distributed_query_finish.reference index c3688b553c4..b48979a492e 100644 --- a/tests/queries/0_stateless/01563_distributed_query_finish.reference +++ b/tests/queries/0_stateless/01563_distributed_query_finish.reference @@ -1,2 +1 @@ -1,0 NETWORK_ERROR=0 diff --git a/tests/queries/0_stateless/01563_distributed_query_finish.sh b/tests/queries/0_stateless/01563_distributed_query_finish.sh index 0019c714e40..e3c5928f108 100755 --- a/tests/queries/0_stateless/01563_distributed_query_finish.sh +++ b/tests/queries/0_stateless/01563_distributed_query_finish.sh @@ -19,20 +19,25 @@ create table dist_01247 as data_01247 engine=Distributed(test_cluster_two_shards select * from dist_01247 format Null; EOL -network_errors_before=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'") +# NOTE: it is possible to got NETWORK_ERROR even with no-parallel, at least due to system.*_log_sender to the cloud +for ((i = 0; i < 100; ++i)); do + network_errors_before=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'") -opts=( - "--max_distributed_connections=1" - "--optimize_skip_unused_shards=1" - "--optimize_distributed_group_by_sharding_key=1" - "--prefer_localhost_replica=0" -) -$CLICKHOUSE_CLIENT "${opts[@]}" --format CSV -nm < Date: Sat, 6 Jul 2024 12:40:58 +0200 Subject: [PATCH 419/439] add mapPartialSort and mapPartialReverseSor functions t documentation --- .../functions/tuple-map-functions.md | 68 ++++++++++++++++++- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index ad40725d680..24b356eca87 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -600,7 +600,7 @@ mapApply(func, map) **Arguments** -- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `func` — [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). - `map` — [Map](../data-types/map.md). **Returned value** @@ -831,7 +831,39 @@ SELECT mapSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; └──────────────────────────────┘ ``` -For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-sort) for `arraySort` function. +For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-sort) for `arraySort` function. + +## mapPartialSort + +Sorts the elements of a map in ascending order with additional `limit` argument allowing partial sorting. +If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map. + +**Syntax** + +```sql +mapPartialSort([func,] limit, map) +``` +**Arguments** + +- `func` – Optional function to apply to the keys and values of the map. [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `limit` – Elements in range [1..limit] are sorted. [(U)Int](../data-types/int-uint.md). +- `map` – Map to sort. [Map](../data-types/map.md). + +**Returned value** + +- Partially sorted map. [Map](../data-types/map.md). + +**Example** + +``` sql +SELECT mapPartialSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2)); +``` + +``` text +┌─mapPartialSort(lambda(tuple(k, v), v), 2, map('k1', 3, 'k2', 1, 'k3', 2))─┐ +│ {'k2':1,'k3':2,'k1':3} │ +└───────────────────────────────────────────────────────────────────────────┘ +``` ## mapReverseSort(\[func,\], map) @@ -861,3 +893,35 @@ SELECT mapReverseSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; ``` For more details see function [arrayReverseSort](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort). + +## mapPartialReverseSort + +Sorts the elements of a map in descending order with additional `limit` argument allowing partial sorting. +If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map. + +**Syntax** + +```sql +mapPartialReverseSort([func,] limit, map) +``` +**Arguments** + +- `func` – Optional function to apply to the keys and values of the map. [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `limit` – Elements in range [1..limit] are sorted. [(U)Int](../data-types/int-uint.md). +- `map` – Map to sort. [Map](../data-types/map.md). + +**Returned value** + +- Partially sorted map. [Map](../data-types/map.md). + +**Example** + +``` sql +SELECT mapPartialReverseSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2)); +``` + +``` text +┌─mapPartialReverseSort(lambda(tuple(k, v), v), 2, map('k1', 3, 'k2', 1, 'k3', 2))─┐ +│ {'k1':3,'k3':2,'k2':1} │ +└──────────────────────────────────────────────────────────────────────────────────┘ +``` \ No newline at end of file From ce785c38d65d3392b1dc91e056b8f669d48fc639 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 6 Jul 2024 12:46:23 +0200 Subject: [PATCH 420/439] Update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 229eccefa48..d47bbd86347 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1102,6 +1102,8 @@ aggregatefunction aggregatingmergetree aggregatio aggretate +aggthrow +aggThrow aiochclient allocator alphaTokens From d6bf7ca15573ee43a05bab98f40d271d11400c0b Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 6 Jul 2024 12:58:21 +0200 Subject: [PATCH 421/439] remove space from aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index d47bbd86347..e62dd57db76 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1102,7 +1102,7 @@ aggregatefunction aggregatingmergetree aggregatio aggretate -aggthrow +aggthrow aggThrow aiochclient allocator From 9dc52217f44580f694e0d6f4460e83be9401ea23 Mon Sep 17 00:00:00 2001 From: vdimir Date: Sat, 6 Jul 2024 15:07:35 +0000 Subject: [PATCH 422/439] Fix SimpleSquashingTransform --- .../Transforms/AggregatingTransform.cpp | 2 +- .../Transforms/SquashingTransform.cpp | 62 +++++++------------ .../Transforms/SquashingTransform.h | 19 +++--- src/Storages/buildQueryTreeForShard.cpp | 2 +- 4 files changed, 35 insertions(+), 50 deletions(-) diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 65f0612d738..cdbe194cfac 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -783,7 +783,7 @@ void AggregatingTransform::initGenerate() { /// Just a reasonable constant, matches default value for the setting `preferred_block_size_bytes` static constexpr size_t oneMB = 1024 * 1024; - return std::make_shared(header, params->params.max_block_size, oneMB); + return std::make_shared(header, params->params.max_block_size, oneMB); }); } /// AggregatingTransform::expandPipeline expects single output port. diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 34b733cde5e..624b41a6d98 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -56,53 +56,37 @@ void SquashingTransform::work() } } -SimpleSquashingTransform::SimpleSquashingTransform( +SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : ISimpleTransform(header, header, false) - , squashing(header, min_block_size_rows, min_block_size_bytes) + : IInflatingTransform(header, header), squashing(min_block_size_rows, min_block_size_bytes) { } -void SimpleSquashingTransform::transform(Chunk & chunk) +void SimpleSquashingChunksTransform::consume(Chunk chunk) { - if (!finished) - { - Chunk planned_chunk = squashing.add(std::move(chunk)); - if (planned_chunk.hasChunkInfo()) - chunk = DB::Squashing::squash(std::move(planned_chunk)); - } - else - { - if (chunk.hasRows()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - - chunk = squashing.flush(); - if (chunk.hasChunkInfo()) - chunk = DB::Squashing::squash(std::move(chunk)); - } + Block current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); } -IProcessor::Status SimpleSquashingTransform::prepare() +Chunk SimpleSquashingChunksTransform::generate() { - if (!finished && input.isFinished()) - { - if (output.isFinished()) - return Status::Finished; + if (squashed_chunk.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); - if (!output.canPush()) - return Status::PortFull; - - if (has_output) - { - output.pushData(std::move(output_data)); - has_output = false; - return Status::PortFull; - } - - finished = true; - /// On the next call to transform() we will return all data buffered in `squashing` (if any) - return Status::Ready; - } - return ISimpleTransform::prepare(); + return std::move(squashed_chunk); } + +bool SimpleSquashingChunksTransform::canGenerate() +{ + return !squashed_chunk.empty(); +} + +Chunk SimpleSquashingChunksTransform::getRemaining() +{ + Block current_block = squashing.add({}); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); + return std::move(squashed_chunk); +} + + } diff --git a/src/Processors/Transforms/SquashingTransform.h b/src/Processors/Transforms/SquashingTransform.h index c5b727ac6ec..8b09722ebbd 100644 --- a/src/Processors/Transforms/SquashingTransform.h +++ b/src/Processors/Transforms/SquashingTransform.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -29,22 +30,22 @@ private: Chunk finish_chunk; }; -/// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port. -class SimpleSquashingTransform : public ISimpleTransform +class SimpleSquashingChunksTransform : public IInflatingTransform { public: - explicit SimpleSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + explicit SimpleSquashingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "SimpleSquashingTransform"; } protected: - void transform(Chunk &) override; - - IProcessor::Status prepare() override; + void consume(Chunk chunk) override; + bool canGenerate() override; + Chunk generate() override; + Chunk getRemaining() override; private: - Squashing squashing; - - bool finished = false; + SquashingTransform squashing; + Chunk squashed_chunk; }; + } diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index ed378169381..84ba92bba00 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -290,7 +290,7 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, size_t min_block_size_rows = mutable_context->getSettingsRef().min_external_table_block_size_rows; size_t min_block_size_bytes = mutable_context->getSettingsRef().min_external_table_block_size_bytes; - auto squashing = std::make_shared(builder->getHeader(), min_block_size_rows, min_block_size_bytes); + auto squashing = std::make_shared(builder->getHeader(), min_block_size_rows, min_block_size_bytes); builder->resize(1); builder->addTransform(std::move(squashing)); From f1f5dfc83ac3fb1ac2d96cd184283557a327709e Mon Sep 17 00:00:00 2001 From: vdimir Date: Sat, 6 Jul 2024 16:02:01 +0000 Subject: [PATCH 423/439] fix SimpleSquashingChunksTransform --- src/Processors/Transforms/SquashingTransform.cpp | 8 +++----- src/Processors/Transforms/SquashingTransform.h | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 624b41a6d98..7b04e4341e0 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -58,14 +58,13 @@ void SquashingTransform::work() SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : IInflatingTransform(header, header), squashing(min_block_size_rows, min_block_size_bytes) + : IInflatingTransform(header, header), squashing(header, min_block_size_rows, min_block_size_bytes) { } void SimpleSquashingChunksTransform::consume(Chunk chunk) { - Block current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); - squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); + squashed_chunk = squashing.add(std::move(chunk)); } Chunk SimpleSquashingChunksTransform::generate() @@ -83,8 +82,7 @@ bool SimpleSquashingChunksTransform::canGenerate() Chunk SimpleSquashingChunksTransform::getRemaining() { - Block current_block = squashing.add({}); - squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); + squashed_chunk = squashing.flush(); return std::move(squashed_chunk); } diff --git a/src/Processors/Transforms/SquashingTransform.h b/src/Processors/Transforms/SquashingTransform.h index 8b09722ebbd..092f58f2fe0 100644 --- a/src/Processors/Transforms/SquashingTransform.h +++ b/src/Processors/Transforms/SquashingTransform.h @@ -44,7 +44,7 @@ protected: Chunk getRemaining() override; private: - SquashingTransform squashing; + Squashing squashing; Chunk squashed_chunk; }; From 06734b351ece7166822a47849596483e899cda7e Mon Sep 17 00:00:00 2001 From: vdimir Date: Sat, 6 Jul 2024 16:35:54 +0000 Subject: [PATCH 424/439] Revert "fix SimpleSquashingChunksTransform" This reverts commit f1f5dfc83ac3fb1ac2d96cd184283557a327709e. --- src/Processors/Transforms/SquashingTransform.cpp | 8 +++++--- src/Processors/Transforms/SquashingTransform.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 7b04e4341e0..624b41a6d98 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -58,13 +58,14 @@ void SquashingTransform::work() SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : IInflatingTransform(header, header), squashing(header, min_block_size_rows, min_block_size_bytes) + : IInflatingTransform(header, header), squashing(min_block_size_rows, min_block_size_bytes) { } void SimpleSquashingChunksTransform::consume(Chunk chunk) { - squashed_chunk = squashing.add(std::move(chunk)); + Block current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); } Chunk SimpleSquashingChunksTransform::generate() @@ -82,7 +83,8 @@ bool SimpleSquashingChunksTransform::canGenerate() Chunk SimpleSquashingChunksTransform::getRemaining() { - squashed_chunk = squashing.flush(); + Block current_block = squashing.add({}); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); return std::move(squashed_chunk); } diff --git a/src/Processors/Transforms/SquashingTransform.h b/src/Processors/Transforms/SquashingTransform.h index 092f58f2fe0..8b09722ebbd 100644 --- a/src/Processors/Transforms/SquashingTransform.h +++ b/src/Processors/Transforms/SquashingTransform.h @@ -44,7 +44,7 @@ protected: Chunk getRemaining() override; private: - Squashing squashing; + SquashingTransform squashing; Chunk squashed_chunk; }; From c195537a1883e153c32971c5180ad30f9aba402a Mon Sep 17 00:00:00 2001 From: vdimir Date: Sat, 6 Jul 2024 16:41:18 +0000 Subject: [PATCH 425/439] fix SimpleSquashingChunksTransform --- .../Transforms/SquashingTransform.cpp | 134 ++++++++++++++++++ .../Transforms/SquashingTransform.h | 31 +++- 2 files changed, 164 insertions(+), 1 deletion(-) diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 624b41a6d98..b5a40c75c5b 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -7,6 +7,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; +extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; } SquashingTransform::SquashingTransform( @@ -88,5 +89,138 @@ Chunk SimpleSquashingChunksTransform::getRemaining() return std::move(squashed_chunk); } +SquashingLegacy::SquashingLegacy(size_t min_block_size_rows_, size_t min_block_size_bytes_) + : min_block_size_rows(min_block_size_rows_) + , min_block_size_bytes(min_block_size_bytes_) +{ +} + +Block SquashingLegacy::add(Block && input_block) +{ + return addImpl(std::move(input_block)); +} + +Block SquashingLegacy::add(const Block & input_block) +{ + return addImpl(input_block); +} + +/* + * To minimize copying, accept two types of argument: const reference for output + * stream, and rvalue reference for input stream, and decide whether to copy + * inside this function. This allows us not to copy Block unless we absolutely + * have to. + */ +template +Block SquashingLegacy::addImpl(ReferenceType input_block) +{ + /// End of input stream. + if (!input_block) + { + Block to_return; + std::swap(to_return, accumulated_block); + return to_return; + } + + /// Just read block is already enough. + if (isEnoughSize(input_block)) + { + /// If no accumulated data, return just read block. + if (!accumulated_block) + { + return std::move(input_block); + } + + /// Return accumulated data (maybe it has small size) and place new block to accumulated data. + Block to_return = std::move(input_block); + std::swap(to_return, accumulated_block); + return to_return; + } + + /// Accumulated block is already enough. + if (isEnoughSize(accumulated_block)) + { + /// Return accumulated data and place new block to accumulated data. + Block to_return = std::move(input_block); + std::swap(to_return, accumulated_block); + return to_return; + } + + append(std::move(input_block)); + if (isEnoughSize(accumulated_block)) + { + Block to_return; + std::swap(to_return, accumulated_block); + return to_return; + } + + /// Squashed block is not ready. + return {}; +} + + +template +void SquashingLegacy::append(ReferenceType input_block) +{ + if (!accumulated_block) + { + accumulated_block = std::move(input_block); + return; + } + + assert(blocksHaveEqualStructure(input_block, accumulated_block)); + + try + { + for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) + { + const auto source_column = input_block.getByPosition(i).column; + + auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); + mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); + accumulated_block.getByPosition(i).column = std::move(mutable_column); + } + } + catch (...) + { + /// add() may be called again even after a previous add() threw an exception. + /// Keep accumulated_block in a valid state. + /// Seems ok to discard accumulated data because we're throwing an exception, which the caller will + /// hopefully interpret to mean "this block and all *previous* blocks are potentially lost". + accumulated_block.clear(); + throw; + } +} + + +bool SquashingLegacy::isEnoughSize(const Block & block) +{ + size_t rows = 0; + size_t bytes = 0; + + for (const auto & [column, type, name] : block) + { + if (!column) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid column in block."); + + if (!rows) + rows = column->size(); + else if (rows != column->size()) + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Sizes of columns doesn't match"); + + bytes += column->byteSize(); + } + + return isEnoughSize(rows, bytes); +} + + +bool SquashingLegacy::isEnoughSize(size_t rows, size_t bytes) const +{ + return (!min_block_size_rows && !min_block_size_bytes) + || (min_block_size_rows && rows >= min_block_size_rows) + || (min_block_size_bytes && bytes >= min_block_size_bytes); +} + } diff --git a/src/Processors/Transforms/SquashingTransform.h b/src/Processors/Transforms/SquashingTransform.h index 8b09722ebbd..452317e7d5e 100644 --- a/src/Processors/Transforms/SquashingTransform.h +++ b/src/Processors/Transforms/SquashingTransform.h @@ -30,6 +30,35 @@ private: Chunk finish_chunk; }; + +class SquashingLegacy +{ +public: + /// Conditions on rows and bytes are OR-ed. If one of them is zero, then corresponding condition is ignored. + SquashingLegacy(size_t min_block_size_rows_, size_t min_block_size_bytes_); + + /** Add next block and possibly returns squashed block. + * At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true. + */ + Block add(Block && block); + Block add(const Block & block); + +private: + size_t min_block_size_rows; + size_t min_block_size_bytes; + + Block accumulated_block; + + template + Block addImpl(ReferenceType block); + + template + void append(ReferenceType block); + + bool isEnoughSize(const Block & block); + bool isEnoughSize(size_t rows, size_t bytes) const; +}; + class SimpleSquashingChunksTransform : public IInflatingTransform { public: @@ -44,7 +73,7 @@ protected: Chunk getRemaining() override; private: - SquashingTransform squashing; + SquashingLegacy squashing; Chunk squashed_chunk; }; From 262972c0ee47f6782e8e073047f4e8a6f6db3229 Mon Sep 17 00:00:00 2001 From: Blargian Date: Sat, 6 Jul 2024 20:13:37 +0200 Subject: [PATCH 426/439] add makeDate32 and makeDateTime64 --- .../functions/date-time-functions.md | 82 ++++++++++++++++++- 1 file changed, 79 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index b532e0de8f0..46b1167fa33 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -83,7 +83,57 @@ Result: ``` ## makeDate32 -Like [makeDate](#makedate) but produces a [Date32](../data-types/date32.md). +Creates a date of type [Date32](../../sql-reference/data-types/date32.md) from a year, month, day (or optionally a year and a day). + +**Syntax** + +```sql +makeDate32(year, [month,] day) +``` + +**Arguments** + +- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `month` — Month (optional). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). + +:::note +If `month` is omitted then `day` should take a value between `1` and `365`, otherwise it should take a value between `1` and `31`. +::: + +**Returned values** + +- A date created from the arguments. [Date32](../../sql-reference/data-types/date32.md). + +**Examples** + +Create a date from a year, month, and day: + +Query: + +```sql +SELECT makeDate32(2024, 1, 1); +``` + +Result: + +```response +2024-01-01 +``` + +Create a Date from a year and day of year: + +Query: + +``` sql +SELECT makeDate32(2024, 100); +``` + +Result: + +```response +2024-04-09 +``` ## makeDateTime @@ -125,12 +175,38 @@ Result: ## makeDateTime64 -Like [makeDateTime](#makedatetime) but produces a [DateTime64](../data-types/datetime64.md). +Creates a [DateTime64](../../sql-reference/data-types/datetime64.md) data type value from its components: year, month, day, hour, minute, second. With optional sub-second precision. **Syntax** +```sql +makeDateTime64(year, month, day, hour, minute, second[, precision]) +``` + +**Arguments** + +- `year` — Year (0-9999). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `month` — Month (1-12). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day` — Day (1-31). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `hour` — Hour (0-23). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `minute` — Minute (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `second` — Second (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `precision` — Optional precision of the sub-second component (0-9). [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- A date and time created from the supplied arguments. [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + ``` sql -makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]]) +SELECT makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5); +``` + +```response +┌─makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5)─┐ +│ 2023-05-15 10:30:45.00779 │ +└─────────────────────────────────────────────────┘ ``` ## timestamp From bbe72326ba567f67f57a7f3d2b2a6535691ab033 Mon Sep 17 00:00:00 2001 From: Blargian Date: Sat, 6 Jul 2024 20:14:23 +0200 Subject: [PATCH 427/439] update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 229eccefa48..43dcbed5c4c 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1966,7 +1966,9 @@ macOS mailrugo mailto makeDate +makeDate32 makeDateTime +makeDateTime64 mannWhitneyUTest mannwhitneyutest mapAdd From 0f8b72af25fa8f35c4292d942942409c736ade6c Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 6 Jul 2024 20:44:45 +0200 Subject: [PATCH 428/439] Update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 229eccefa48..40cdd04bd27 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1980,6 +1980,8 @@ mapExtractKeyLike mapFilter mapFromArrays mapKeys +mapPartialReverseSort +mapPartialSort mapPopulateSeries mapReverseSort mapSort From d70d93f362ebceae4d3c981a84af6974c50c3f0b Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 6 Jul 2024 20:45:57 +0200 Subject: [PATCH 429/439] Update aspell-dict.txt From 912218d9fc7fcf12a3ac86ed3d5402d94080d3fd Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 6 Jul 2024 20:46:18 +0200 Subject: [PATCH 430/439] Update aspell-dict.txt From 458d4ca8628b82a98a2165c1a86433af0fa6a9e6 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 6 Jul 2024 20:56:47 +0200 Subject: [PATCH 431/439] Update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 43dcbed5c4c..b5417719c31 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1966,7 +1966,6 @@ macOS mailrugo mailto makeDate -makeDate32 makeDateTime makeDateTime64 mannWhitneyUTest From aea4734edd78eadccc5b30db6d09369a3ede708f Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 6 Jul 2024 21:28:16 +0200 Subject: [PATCH 432/439] Update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index b5417719c31..229eccefa48 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1967,7 +1967,6 @@ mailrugo mailto makeDate makeDateTime -makeDateTime64 mannWhitneyUTest mannwhitneyutest mapAdd From 84f876f0983ab425f162d0b69092770484392abc Mon Sep 17 00:00:00 2001 From: avogar Date: Sat, 6 Jul 2024 20:35:43 +0000 Subject: [PATCH 433/439] Support null map subcolumn for Variant and Dynamic subcolumns --- src/DataTypes/DataTypeDynamic.cpp | 46 +- src/DataTypes/IDataType.cpp | 2 +- .../Serializations/ISerialization.cpp | 8 +- src/DataTypes/Serializations/ISerialization.h | 14 +- .../SerializationDynamicElement.cpp | 21 +- .../SerializationDynamicElement.h | 6 +- .../Serializations/SerializationVariant.cpp | 29 +- .../Serializations/SerializationVariant.h | 2 + .../SerializationVariantElement.cpp | 31 +- .../SerializationVariantElement.h | 12 +- .../SerializationVariantElementNullMap.cpp | 190 +++ .../SerializationVariantElementNullMap.h | 107 ++ .../02941_variant_type_1.reference | 1380 ++++++++--------- .../02941_variant_type_2.reference | 15 - .../0_stateless/02941_variant_type_2.sh | 5 +- .../02941_variant_type_3.reference | 15 - .../0_stateless/02941_variant_type_3.sh | 5 +- .../02941_variant_type_4.reference | 15 - .../0_stateless/02941_variant_type_4.sh | 5 +- .../03040_dynamic_type_alters_1.reference | 858 +++++----- .../03040_dynamic_type_alters_2.reference | 276 ++-- .../03041_dynamic_type_check_table.reference | 72 +- ...03201_variant_null_map_subcolumn.reference | 402 +++++ .../03201_variant_null_map_subcolumn.sh | 44 + ...03202_dynamic_null_map_subcolumn.reference | 57 + .../03202_dynamic_null_map_subcolumn.sh | 62 + 26 files changed, 2299 insertions(+), 1380 deletions(-) create mode 100644 src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp create mode 100644 src/DataTypes/Serializations/SerializationVariantElementNullMap.h create mode 100644 tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference create mode 100755 tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh create mode 100644 tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference create mode 100755 tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp index c920e69c13b..5302cdb18f9 100644 --- a/src/DataTypes/DataTypeDynamic.cpp +++ b/src/DataTypes/DataTypeDynamic.cpp @@ -2,9 +2,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -110,28 +112,58 @@ std::unique_ptr DataTypeDynamic::getDynamicSubcolumnDa } /// Extract nested subcolumn of requested dynamic subcolumn if needed. - if (!subcolumn_nested_name.empty()) + /// If requested subcolumn is null map, it's processed separately as there is no Nullable type yet. + bool is_null_map_subcolumn = subcolumn_nested_name == "null"; + if (is_null_map_subcolumn) + { + res->type = std::make_shared(); + } + else if (!subcolumn_nested_name.empty()) { res = getSubcolumnData(subcolumn_nested_name, *res, throw_if_null); if (!res) return nullptr; } - res->serialization = std::make_shared(res->serialization, subcolumn_type->getName()); - res->type = makeNullableOrLowCardinalityNullableSafe(res->type); + res->serialization = std::make_shared(res->serialization, subcolumn_type->getName(), is_null_map_subcolumn); + /// Make resulting subcolumn Nullable only if type subcolumn can be inside Nullable or can be LowCardinality(Nullable()). + bool make_subcolumn_nullable = subcolumn_type->canBeInsideNullable() || subcolumn_type->lowCardinality(); + if (!is_null_map_subcolumn && make_subcolumn_nullable) + res->type = makeNullableOrLowCardinalityNullableSafe(res->type); + if (data.column) { if (discriminator) { - /// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator to + /// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator/VariantNullMapSubcolumnCreator to /// create full subcolumn from variant according to discriminators. const auto & variant_column = assert_cast(*data.column).getVariantColumn(); - auto creator = SerializationVariantElement::VariantSubcolumnCreator(variant_column.getLocalDiscriminatorsPtr(), "", *discriminator, variant_column.localDiscriminatorByGlobal(*discriminator)); - res->column = creator.create(res->column); + std::unique_ptr creator; + if (is_null_map_subcolumn) + creator = std::make_unique( + variant_column.getLocalDiscriminatorsPtr(), + "", + *discriminator, + variant_column.localDiscriminatorByGlobal(*discriminator)); + else + creator = std::make_unique( + variant_column.getLocalDiscriminatorsPtr(), + "", + *discriminator, + variant_column.localDiscriminatorByGlobal(*discriminator), + make_subcolumn_nullable); + res->column = creator->create(res->column); + } + /// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values. + else if (is_null_map_subcolumn) + { + /// Fill null map with 1 when there is no such Dynamic subcolumn. + auto column = ColumnUInt8::create(); + assert_cast(*column).getData().resize_fill(data.column->size(), 1); + res->column = std::move(column); } else { - /// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values. auto column = res->type->createColumn(); column->insertManyDefaults(data.column->size()); res->column = std::move(column); diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 1c9715bbf53..1cb64b65d3a 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -173,7 +173,7 @@ bool IDataType::hasDynamicSubcolumns() const auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()); auto callback = [&](const SubstreamPath &, const String &, const SubstreamData & subcolumn_data) { - has_dynamic_subcolumns |= subcolumn_data.type->hasDynamicSubcolumnsData(); + has_dynamic_subcolumns |= subcolumn_data.type && subcolumn_data.type->hasDynamicSubcolumnsData(); }; forEachSubcolumn(callback, data); return has_dynamic_subcolumns; diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index bbb1d1a6cd1..7642a6619b3 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -64,6 +64,9 @@ String ISerialization::Substream::toString() const if (type == VariantElement) return fmt::format("VariantElement({})", variant_element_name); + if (type == VariantElementNullMap) + return fmt::format("VariantElementNullMap({}.null)", variant_element_name); + return String(magic_enum::enum_name(type)); } @@ -195,6 +198,8 @@ String getNameForSubstreamPath( stream_name += ".variant_offsets"; else if (it->type == Substream::VariantElement) stream_name += "." + it->variant_element_name; + else if (it->type == Substream::VariantElementNullMap) + stream_name += "." + it->variant_element_name + ".null"; else if (it->type == SubstreamType::DynamicStructure) stream_name += ".dynamic_structure"; } @@ -395,7 +400,8 @@ bool ISerialization::hasSubcolumnForPath(const SubstreamPath & path, size_t pref return path[last_elem].type == Substream::NullMap || path[last_elem].type == Substream::TupleElement || path[last_elem].type == Substream::ArraySizes - || path[last_elem].type == Substream::VariantElement; + || path[last_elem].type == Substream::VariantElement + || path[last_elem].type == Substream::VariantElementNullMap; } ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len) diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 89e2079490e..6007eca94d4 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -184,6 +184,7 @@ public: VariantOffsets, VariantElements, VariantElement, + VariantElementNullMap, DynamicData, DynamicStructure, @@ -436,6 +437,9 @@ protected: template State * checkAndGetState(const StatePtr & state) const; + template + static State * checkAndGetState(const StatePtr & state, const ISerialization * serialization); + [[noreturn]] void throwUnexpectedDataAfterParsedValue(IColumn & column, ReadBuffer & istr, const FormatSettings &, const String & type_name) const; }; @@ -446,10 +450,16 @@ using SubstreamType = ISerialization::Substream::Type; template State * ISerialization::checkAndGetState(const StatePtr & state) const +{ + return checkAndGetState(state, this); +} + +template +State * ISerialization::checkAndGetState(const StatePtr & state, const ISerialization * serialization) { if (!state) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Got empty state for {}", demangle(typeid(*this).name())); + "Got empty state for {}", demangle(typeid(*serialization).name())); auto * state_concrete = typeid_cast(state.get()); if (!state_concrete) @@ -457,7 +467,7 @@ State * ISerialization::checkAndGetState(const StatePtr & state) const auto & state_ref = *state; throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid State for {}. Expected: {}, got {}", - demangle(typeid(*this).name()), + demangle(typeid(*serialization).name()), demangle(typeid(State).name()), demangle(typeid(state_ref).name())); } diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp index dafd6d663b0..211f0ac9377 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.cpp +++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -77,7 +78,10 @@ void SerializationDynamicElement::deserializeBinaryBulkStatePrefix( if (auto global_discr = assert_cast(*variant_type).tryGetVariantDiscriminator(dynamic_element_name)) { settings.path.push_back(Substream::DynamicData); - dynamic_element_state->variant_serialization = std::make_shared(nested_serialization, dynamic_element_name, *global_discr); + if (is_null_map_subcolumn) + dynamic_element_state->variant_serialization = std::make_shared(dynamic_element_name, *global_discr); + else + dynamic_element_state->variant_serialization = std::make_shared(nested_serialization, dynamic_element_name, *global_discr); dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache); settings.path.pop_back(); } @@ -98,7 +102,16 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams( SubstreamsCache * cache) const { if (!state) + { + if (is_null_map_subcolumn) + { + auto mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + data.resize_fill(data.size() + limit, 1); + } + return; + } auto * dynamic_element_state = checkAndGetState(state); @@ -108,6 +121,12 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams( dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, dynamic_element_state->variant_element_state, cache); settings.path.pop_back(); } + else if (is_null_map_subcolumn) + { + auto mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + data.resize_fill(data.size() + limit, 1); + } else { auto mutable_column = result_column->assumeMutable(); diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.h b/src/DataTypes/Serializations/SerializationDynamicElement.h index 2ddc3324139..127d14a55e0 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.h +++ b/src/DataTypes/Serializations/SerializationDynamicElement.h @@ -13,11 +13,11 @@ private: /// To be able to deserialize Dynamic element as a subcolumn /// we need its type name and global discriminator. String dynamic_element_name; + bool is_null_map_subcolumn; public: - SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_) - : SerializationWrapper(nested_) - , dynamic_element_name(dynamic_element_name_) + SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_, bool is_null_map_subcolumn_ = false) + : SerializationWrapper(nested_), dynamic_element_name(dynamic_element_name_), is_null_map_subcolumn(is_null_map_subcolumn_) { } diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index 8cdd312a707..e4d71e84cc7 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -71,10 +72,16 @@ void SerializationVariant::enumerateStreams( for (size_t i = 0; i < variants.size(); ++i) { - settings.path.back().creator = std::make_shared(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i); + DataTypePtr type = type_variant ? type_variant->getVariant(i) : nullptr; + settings.path.back().creator = std::make_shared( + local_discriminators, + variant_names[i], + i, + column_variant ? column_variant->localDiscriminatorByGlobal(i) : i, + !type || type->canBeInsideNullable() || type->lowCardinality()); auto variant_data = SubstreamData(variants[i]) - .withType(type_variant ? type_variant->getVariant(i) : nullptr) + .withType(type) .withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr) .withSerializationInfo(data.serialization_info) .withDeserializeState(variant_deserialize_state ? variant_deserialize_state->variant_states[i] : nullptr); @@ -85,6 +92,24 @@ void SerializationVariant::enumerateStreams( settings.path.pop_back(); } + /// Variant subcolumns like variant.Type have type Nullable(Type), so we want to support reading null map subcolumn from it: variant.Type.null. + /// Nullable column is created during deserialization of a variant subcolumn according to the discriminators, so we don't have actual Nullable + /// serialization with null map subcolumn. To be able to read null map subcolumn from the variant subcolumn we use special serialization + /// SerializationVariantElementNullMap. + auto null_map_data = SubstreamData(std::make_shared>()) + .withType(type_variant ? std::make_shared() : nullptr) + .withColumn(column_variant ? ColumnUInt8::create() : nullptr); + + for (size_t i = 0; i < variants.size(); ++i) + { + settings.path.back().creator = std::make_shared(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i); + settings.path.push_back(Substream::VariantElementNullMap); + settings.path.back().variant_element_name = variant_names[i]; + settings.path.back().data = null_map_data; + callback(settings.path); + settings.path.pop_back(); + } + settings.path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h index f777ef1203d..af89632cf81 100644 --- a/src/DataTypes/Serializations/SerializationVariant.h +++ b/src/DataTypes/Serializations/SerializationVariant.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -161,6 +162,7 @@ public: private: friend SerializationVariantElement; + friend SerializationVariantElementNullMap; void addVariantElementToPath(SubstreamPath & path, size_t i) const; diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index 46f9194baa9..8ceab17cba4 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -115,7 +115,14 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC) SerializationNumber().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); else - variant_limit = deserializeCompactDiscriminators(variant_element_state->discriminators, limit, discriminators_stream, settings.continuous_reading, *variant_element_state); + variant_limit = deserializeCompactDiscriminators( + variant_element_state->discriminators, + variant_discriminator, + limit, + discriminators_stream, + settings.continuous_reading, + variant_element_state->discriminators_state, + this); addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators); } @@ -224,12 +231,14 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( size_t SerializationVariantElement::deserializeCompactDiscriminators( DB::ColumnPtr & discriminators_column, + ColumnVariant::Discriminator variant_discriminator, size_t limit, DB::ReadBuffer * stream, bool continuous_reading, - DeserializeBinaryBulkStateVariantElement & variant_element_state) const + DeserializeBinaryBulkStatePtr & discriminators_state_, + const ISerialization * serialization) { - auto * discriminators_state = checkAndGetState(variant_element_state.discriminators_state); + auto * discriminators_state = checkAndGetState(discriminators_state_, serialization); auto & discriminators = assert_cast(*discriminators_column->assumeMutable()); auto & discriminators_data = discriminators.getData(); @@ -290,17 +299,19 @@ SerializationVariantElement::VariantSubcolumnCreator::VariantSubcolumnCreator( const ColumnPtr & local_discriminators_, const String & variant_element_name_, ColumnVariant::Discriminator global_variant_discriminator_, - ColumnVariant::Discriminator local_variant_discriminator_) + ColumnVariant::Discriminator local_variant_discriminator_, + bool make_nullable_) : local_discriminators(local_discriminators_) , variant_element_name(variant_element_name_) , global_variant_discriminator(global_variant_discriminator_) , local_variant_discriminator(local_variant_discriminator_) + , make_nullable(make_nullable_) { } DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::DataTypePtr & prev) const { - return makeNullableOrLowCardinalityNullableSafe(prev); + return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev; } SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::SerializationPtr & prev) const @@ -313,12 +324,12 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB: /// Case when original Variant column contained only one non-empty variant and no NULLs. /// In this case just use this variant. if (prev->size() == local_discriminators->size()) - return makeNullableOrLowCardinalityNullableSafe(prev); + return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev; /// If this variant is empty, fill result column with default values. if (prev->empty()) { - auto res = makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty(); + auto res = make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty() : prev->cloneEmpty(); res->insertManyDefaults(local_discriminators->size()); return res; } @@ -333,16 +344,16 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB: /// Now we can create new column from null-map and variant column using IColumn::expand. auto res_column = IColumn::mutate(prev); - /// Special case for LowCardinality. We want the result to be LowCardinality(Nullable), + /// Special case for LowCardinality when we want the result to be LowCardinality(Nullable), /// but we don't have a good way to apply null-mask for LowCardinality(), so, we first /// convert our column to LowCardinality(Nullable()) and then use expand which will /// fill rows with 0 in mask with default value (that is NULL). - if (prev->lowCardinality()) + if (make_nullable && prev->lowCardinality()) res_column = assert_cast(*res_column).cloneNullable(); res_column->expand(null_map, /*inverted = */ true); - if (res_column->canBeInsideNullable()) + if (make_nullable && prev->canBeInsideNullable()) { auto null_map_col = ColumnUInt8::create(); null_map_col->getData() = std::move(null_map); diff --git a/src/DataTypes/Serializations/SerializationVariantElement.h b/src/DataTypes/Serializations/SerializationVariantElement.h index f6d4e069633..69101aea0f5 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.h +++ b/src/DataTypes/Serializations/SerializationVariantElement.h @@ -9,6 +9,7 @@ namespace DB { class SerializationVariant; +class SerializationVariantElementNullMap; /// Serialization for Variant element when we read it as a subcolumn. class SerializationVariantElement final : public SerializationWrapper @@ -66,12 +67,14 @@ public: const String variant_element_name; const ColumnVariant::Discriminator global_variant_discriminator; const ColumnVariant::Discriminator local_variant_discriminator; + bool make_nullable; VariantSubcolumnCreator( const ColumnPtr & local_discriminators_, const String & variant_element_name_, ColumnVariant::Discriminator global_variant_discriminator_, - ColumnVariant::Discriminator local_variant_discriminator_); + ColumnVariant::Discriminator local_variant_discriminator_, + bool make_nullable_); DataTypePtr create(const DataTypePtr & prev) const override; ColumnPtr create(const ColumnPtr & prev) const override; @@ -79,15 +82,18 @@ public: }; private: friend SerializationVariant; + friend SerializationVariantElementNullMap; struct DeserializeBinaryBulkStateVariantElement; - size_t deserializeCompactDiscriminators( + static size_t deserializeCompactDiscriminators( ColumnPtr & discriminators_column, + ColumnVariant::Discriminator variant_discriminator, size_t limit, ReadBuffer * stream, bool continuous_reading, - DeserializeBinaryBulkStateVariantElement & variant_element_state) const; + DeserializeBinaryBulkStatePtr & discriminators_state_, + const ISerialization * serialization); void addVariantToPath(SubstreamPath & path) const; void removeVariantFromPath(SubstreamPath & path) const; diff --git a/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp b/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp new file mode 100644 index 00000000000..4e355fbb8ef --- /dev/null +++ b/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp @@ -0,0 +1,190 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +struct DeserializeBinaryBulkStateVariantElementNullMap : public ISerialization::DeserializeBinaryBulkState +{ + /// During deserialization discriminators streams can be shared. + /// For example we can read several variant elements together: "select v.UInt32, v.String.null from table", + /// or we can read the whole variant and some of variant elements or their subcolumns: "select v, v.UInt32.null from table". + /// To read the same column from the same stream more than once we use substream cache, + /// but this cache stores the whole column, not only the current range. + /// During deserialization of variant elements or their subcolumns discriminators column is not stored + /// in the result column, so we need to store them inside deserialization state, so we can use + /// substream cache correctly. + ColumnPtr discriminators; + ISerialization::DeserializeBinaryBulkStatePtr discriminators_state; +}; + +void SerializationVariantElementNullMap::enumerateStreams( + DB::ISerialization::EnumerateStreamsSettings & settings, + const DB::ISerialization::StreamCallback & callback, + const DB::ISerialization::SubstreamData &) const +{ + /// We will need stream for discriminators during deserialization. + settings.path.push_back(Substream::VariantDiscriminators); + callback(settings.path); + settings.path.pop_back(); +} + +void SerializationVariantElementNullMap::serializeBinaryBulkStatePrefix( + const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationVariantElementNullMap"); +} + +void SerializationVariantElementNullMap::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElementNullMap"); +} + +void SerializationVariantElementNullMap::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const +{ + DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache); + if (!discriminators_state) + return; + + auto variant_element_null_map_state = std::make_shared(); + variant_element_null_map_state->discriminators_state = std::move(discriminators_state); + state = std::move(variant_element_null_map_state); +} + +void SerializationVariantElementNullMap::serializeBinaryBulkWithMultipleStreams( + const IColumn &, size_t, size_t, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Method serializeBinaryBulkWithMultipleStreams is not implemented for SerializationVariantElementNullMap"); +} + +void SerializationVariantElementNullMap::deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & result_column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const +{ + /// Deserialize discriminators from Variant column. + settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStateVariantElementNullMap * variant_element_null_map_state = nullptr; + std::optional variant_limit; + if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) + { + variant_element_null_map_state = checkAndGetState(state); + variant_element_null_map_state->discriminators = cached_discriminators; + } + else if (auto * discriminators_stream = settings.getter(settings.path)) + { + variant_element_null_map_state = checkAndGetState(state); + auto * discriminators_state = checkAndGetState( + variant_element_null_map_state->discriminators_state); + + /// If we started to read a new column, reinitialize discriminators column in deserialization state. + if (!variant_element_null_map_state->discriminators || result_column->empty()) + variant_element_null_map_state->discriminators = ColumnVariant::ColumnDiscriminators::create(); + + /// Deserialize discriminators according to serialization mode. + if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC) + SerializationNumber().deserializeBinaryBulk( + *variant_element_null_map_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); + else + variant_limit = SerializationVariantElement::deserializeCompactDiscriminators( + variant_element_null_map_state->discriminators, + variant_discriminator, + limit, + discriminators_stream, + settings.continuous_reading, + variant_element_null_map_state->discriminators_state, + this); + + addToSubstreamsCache(cache, settings.path, variant_element_null_map_state->discriminators); + } + else + { + /// There is no such stream or cached data, it means that there is no Variant column in this part (it could happend after alter table add column). + /// In such cases columns are filled with default values, but for null-map column default value should be 1, not 0. Fill column with 1 here instead + MutableColumnPtr mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + data.resize_fill(data.size() + limit, 1); + settings.path.pop_back(); + return; + } + settings.path.pop_back(); + + MutableColumnPtr mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + /// Check if there are no such variant in read range. + if (variant_limit && *variant_limit == 0) + { + data.resize_fill(data.size() + limit, 1); + } + /// Check if there is only our variant in read range. + else if (variant_limit && *variant_limit == limit) + { + data.resize_fill(data.size() + limit, 0); + } + /// Iterate through new discriminators to calculate the null map of our variant. + else + { + const auto & discriminators_data + = assert_cast(*variant_element_null_map_state->discriminators).getData(); + size_t discriminators_offset = variant_element_null_map_state->discriminators->size() - limit; + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + data.push_back(discriminators_data[i] != variant_discriminator); + } +} + +SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::VariantNullMapSubcolumnCreator( + const ColumnPtr & local_discriminators_, + const String & variant_element_name_, + ColumnVariant::Discriminator global_variant_discriminator_, + ColumnVariant::Discriminator local_variant_discriminator_) + : local_discriminators(local_discriminators_) + , variant_element_name(variant_element_name_) + , global_variant_discriminator(global_variant_discriminator_) + , local_variant_discriminator(local_variant_discriminator_) +{ +} + +DataTypePtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::DataTypePtr &) const +{ + return std::make_shared(); +} + +SerializationPtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::SerializationPtr &) const +{ + return std::make_shared(variant_element_name, global_variant_discriminator); +} + +ColumnPtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::ColumnPtr &) const +{ + /// Iterate through discriminators and create null-map for our variant. + auto null_map_col = ColumnUInt8::create(); + auto & null_map_data = null_map_col->getData(); + null_map_data.reserve(local_discriminators->size()); + const auto & local_discriminators_data = assert_cast(*local_discriminators).getData(); + for (auto local_discr : local_discriminators_data) + null_map_data.push_back(local_discr != local_variant_discriminator); + + return null_map_col; +} + + +} diff --git a/src/DataTypes/Serializations/SerializationVariantElementNullMap.h b/src/DataTypes/Serializations/SerializationVariantElementNullMap.h new file mode 100644 index 00000000000..cd81b445189 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationVariantElementNullMap.h @@ -0,0 +1,107 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +class SerializationVariant; +class SerializationVariantElement; + +/// Serialization for Variant element null map when we read it as a subcolumn. +/// For example, variant.UInt64.null. +/// It requires separate serialization because there is no actual Nullable column +/// and we should construct null map from variant discriminators. +/// The implementation of deserializeBinaryBulk* methods is similar to SerializationVariantElement, +/// but differs in that there is no need to read the actual data of the variant, only discriminators. +class SerializationVariantElementNullMap final : public SimpleTextSerialization +{ +public: + SerializationVariantElementNullMap(const String & variant_element_name_, ColumnVariant::Discriminator variant_discriminator_) + : variant_element_name(variant_element_name_), variant_discriminator(variant_discriminator_) + { + } + + void enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const override; + + void serializeBinaryBulkStatePrefix( + const IColumn & column, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; + + void serializeBinaryBulkWithMultipleStreams( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const override; + + void serializeBinary(const Field &, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeBinary(Field &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void serializeBinary(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); } + bool tryDeserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); } + + struct VariantNullMapSubcolumnCreator : public ISubcolumnCreator + { + const ColumnPtr local_discriminators; + const String variant_element_name; + const ColumnVariant::Discriminator global_variant_discriminator; + const ColumnVariant::Discriminator local_variant_discriminator; + + VariantNullMapSubcolumnCreator( + const ColumnPtr & local_discriminators_, + const String & variant_element_name_, + ColumnVariant::Discriminator global_variant_discriminator_, + ColumnVariant::Discriminator local_variant_discriminator_); + + DataTypePtr create(const DataTypePtr & prev) const override; + ColumnPtr create(const ColumnPtr & prev) const override; + SerializationPtr create(const SerializationPtr & prev) const override; + }; +private: + [[noreturn]] static void throwNoSerialization() + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Text/binary serialization is not implemented for variant element null map subcolumn"); + } + + friend SerializationVariant; + friend SerializationVariantElement; + + /// To be able to deserialize Variant element null map as a subcolumn + /// we need variant element type name and global discriminator. + String variant_element_name; + ColumnVariant::Discriminator variant_discriminator; + +}; + +} diff --git a/tests/queries/0_stateless/02941_variant_type_1.reference b/tests/queries/0_stateless/02941_variant_type_1.reference index 8a6e77d4f6d..53e5a556821 100644 --- a/tests/queries/0_stateless/02941_variant_type_1.reference +++ b/tests/queries/0_stateless/02941_variant_type_1.reference @@ -91,42 +91,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -145,21 +145,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -256,42 +256,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -310,23 +310,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test3 insert @@ -421,42 +421,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -475,23 +475,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- MergeTree compact @@ -587,42 +587,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -641,21 +641,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -751,42 +751,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -805,21 +805,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -916,42 +916,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -970,23 +970,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test2 select @@ -1080,42 +1080,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1134,23 +1134,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test3 insert @@ -1245,42 +1245,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -1299,23 +1299,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- test3 select @@ -1409,42 +1409,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -1463,23 +1463,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- MergeTree wide @@ -1575,42 +1575,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1629,21 +1629,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -1739,42 +1739,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1793,21 +1793,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -1904,42 +1904,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1958,23 +1958,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test2 select @@ -2068,42 +2068,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -2122,23 +2122,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test3 insert @@ -2233,42 +2233,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -2287,23 +2287,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- test3 select @@ -2397,42 +2397,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -2451,22 +2451,22 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02941_variant_type_2.reference b/tests/queries/0_stateless/02941_variant_type_2.reference index 20a5176cb5e..1d9126aa230 100644 --- a/tests/queries/0_stateless/02941_variant_type_2.reference +++ b/tests/queries/0_stateless/02941_variant_type_2.reference @@ -6,9 +6,6 @@ test4 select 100000 100000 100000 -100000 -100000 -100000 MergeTree compact test4 insert test4 select @@ -17,18 +14,12 @@ test4 select 100000 100000 100000 -100000 -100000 -100000 test4 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 MergeTree wide test4 insert test4 select @@ -37,15 +28,9 @@ test4 select 100000 100000 100000 -100000 -100000 -100000 test4 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 diff --git a/tests/queries/0_stateless/02941_variant_type_2.sh b/tests/queries/0_stateless/02941_variant_type_2.sh index f43cd2bb0d6..8453bce98dc 100755 --- a/tests/queries/0_stateless/02941_variant_type_2.sh +++ b/tests/queries/0_stateless/02941_variant_type_2.sh @@ -33,13 +33,10 @@ select v.\`LowCardinality(String)\` from test format Null; select count() from test where isNotNull(v.\`LowCardinality(String)\`); select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); select v.\`Array(UInt64)\` from test format Null; select count() from test where not empty(v.\`Array(UInt64)\`); -select v.\`Array(UInt64)\`.size0 from test format Null; -select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" +select v.\`Array(UInt64)\`.size0 from test format Null;" } function run() diff --git a/tests/queries/0_stateless/02941_variant_type_3.reference b/tests/queries/0_stateless/02941_variant_type_3.reference index 1ccdb3acdff..d28aa7a594b 100644 --- a/tests/queries/0_stateless/02941_variant_type_3.reference +++ b/tests/queries/0_stateless/02941_variant_type_3.reference @@ -6,9 +6,6 @@ test5 select 100000 100000 100000 -100000 -100000 -100000 MergeTree compact test5 insert test5 select @@ -17,18 +14,12 @@ test5 select 100000 100000 100000 -100000 -100000 -100000 test5 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 MergeTree wide test5 insert test5 select @@ -37,15 +28,9 @@ test5 select 100000 100000 100000 -100000 -100000 -100000 test5 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 diff --git a/tests/queries/0_stateless/02941_variant_type_3.sh b/tests/queries/0_stateless/02941_variant_type_3.sh index f4b2b304f56..990eb25b5be 100755 --- a/tests/queries/0_stateless/02941_variant_type_3.sh +++ b/tests/queries/0_stateless/02941_variant_type_3.sh @@ -35,13 +35,10 @@ select v.\`LowCardinality(String)\` from test format Null; select count() from test where isNotNull(v.\`LowCardinality(String)\`); select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); select v.\`Array(UInt64)\` from test format Null; select count() from test where not empty(v.\`Array(UInt64)\`); -select v.\`Array(UInt64)\`.size0 from test format Null; -select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" +select v.\`Array(UInt64)\`.size0 from test format Null;" } function run() diff --git a/tests/queries/0_stateless/02941_variant_type_4.reference b/tests/queries/0_stateless/02941_variant_type_4.reference index e13d5820343..d1630b04347 100644 --- a/tests/queries/0_stateless/02941_variant_type_4.reference +++ b/tests/queries/0_stateless/02941_variant_type_4.reference @@ -6,9 +6,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- MergeTree compact test6 insert @@ -18,9 +15,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- test6 select 1000000 @@ -28,9 +22,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- MergeTree wide test6 insert @@ -40,9 +31,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- test6 select 1000000 @@ -50,7 +38,4 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02941_variant_type_4.sh b/tests/queries/0_stateless/02941_variant_type_4.sh index f9a16847864..b8f619694b0 100755 --- a/tests/queries/0_stateless/02941_variant_type_4.sh +++ b/tests/queries/0_stateless/02941_variant_type_4.sh @@ -29,13 +29,10 @@ function test6_select() select count() from test where isNotNull(v.\`LowCardinality(String)\`); select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; - select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; - select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); select v.\`Array(UInt64)\` from test format Null; select count() from test where not empty(v.\`Array(UInt64)\`); - select v.\`Array(UInt64)\`.size0 from test format Null; - select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" + select v.\`Array(UInt64)\`.size0 from test format Null;" echo "-----------------------------------------------------------------------------------------------------------" } diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference index ca98ec0963c..a9c785d1e48 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference @@ -2,525 +2,525 @@ Memory initial insert alter add column 1 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter modify column 1 7 None 8 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert after alter modify column 1 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 alter modify column 2 4 UInt64 7 String 8 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 insert after alter modify column 2 1 Date 5 UInt64 8 String 9 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N -19 19 \N \N \N \N \N -20 20 20 \N 20 \N \N -21 21 str_21 str_21 \N \N \N -22 22 1970-01-23 \N \N 1970-01-23 \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 1 Date 5 UInt64 8 String 9 None -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N \N 3 \N \N -4 4 4 \N \N \N 4 \N \N -5 5 5 \N \N \N 5 \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N \N 12 \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 \N 12 \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 1 Date 5 UInt64 8 String 12 None -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N \N 3 \N \N -4 4 4 \N \N \N 4 \N \N -5 5 5 \N \N \N 5 \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N \N 12 \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N -23 \N \N \N \N \N \N \N \N -24 24 24 \N \N \N \N \N \N -25 str_25 \N str_25 \N \N \N \N \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 \N 12 \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 MergeTree compact initial insert alter add column 1 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter modify column 1 7 None 8 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert after alter modify column 1 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 alter modify column 2 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 insert after alter modify column 2 1 Date 1 UInt64 9 None 12 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N -19 19 \N \N \N \N \N -20 20 20 \N 20 \N \N -21 21 str_21 str_21 \N \N \N -22 22 1970-01-23 \N \N 1970-01-23 \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 1 Date 1 UInt64 9 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 1 Date 1 UInt64 12 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N -23 \N \N \N \N \N \N \N \N -24 24 24 \N \N \N \N \N \N -25 str_25 \N str_25 \N \N \N \N \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 MergeTree wide initial insert alter add column 1 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter modify column 1 7 None 8 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert after alter modify column 1 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 alter modify column 2 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 insert after alter modify column 2 1 Date 1 UInt64 9 None 12 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N -19 19 \N \N \N \N \N -20 20 20 \N 20 \N \N -21 21 str_21 str_21 \N \N \N -22 22 1970-01-23 \N \N 1970-01-23 \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 1 Date 1 UInt64 9 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 1 Date 1 UInt64 12 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N -23 \N \N \N \N \N \N \N \N -24 24 24 \N \N \N \N \N \N -25 str_25 \N str_25 \N \N \N \N \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference index 18a181464e9..f7c00bd8c44 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference @@ -2,181 +2,181 @@ MergeTree compact initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter rename column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert nested dynamic 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] alter rename column 2 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] MergeTree wide initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter rename column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert nested dynamic 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] alter rename column 2 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] diff --git a/tests/queries/0_stateless/03041_dynamic_type_check_table.reference b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference index b1ea186a917..0dab4ea0d20 100644 --- a/tests/queries/0_stateless/03041_dynamic_type_check_table.reference +++ b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference @@ -2,55 +2,55 @@ MergeTree compact initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 check table 1 MergeTree wide initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 check table 1 diff --git a/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference new file mode 100644 index 00000000000..8565fe3d0fa --- /dev/null +++ b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference @@ -0,0 +1,402 @@ +Memory +test +[] 1 0 0 [] +1 0 1 0 [] +\N 1 1 0 [] +['str_3','str_3','str_3'] 1 0 3 [1,1,1] +4 0 1 0 [] +\N 1 1 0 [] +[6,6,6,6,6,6] 1 0 6 [0,0,0,0,0,0] +7 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 9 [1,1,1,1,1,1,1,1,1] +10 0 1 0 [] +\N 1 1 0 [] +['str_12','str_12'] 1 0 2 [1,1] +13 0 1 0 [] +\N 1 1 0 [] +[15,15,15,15,15] 1 0 5 [0,0,0,0,0] +16 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 8 [1,1,1,1,1,1,1,1] +19 0 1 0 [] +\N 1 1 0 [] +['str_21'] 1 0 1 [1] +22 0 1 0 [] +\N 1 1 0 [] +[24,24,24,24] 1 0 4 [0,0,0,0] +25 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 7 [1,1,1,1,1,1,1] +28 0 1 0 [] +\N 1 1 0 [] +[] 1 0 0 [] +31 0 1 0 [] +\N 1 1 0 [] +[33,33,33] 1 0 3 [0,0,0] +34 0 1 0 [] +\N 1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [1,1,1] +0 1 0 [] +1 1 0 [] +1 0 6 [0,0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 9 [1,1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 2 [1,1] +0 1 0 [] +1 1 0 [] +1 0 5 [0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 8 [1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 1 [1] +0 1 0 [] +1 1 0 [] +1 0 4 [0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 7 [1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [0,0,0] +0 1 0 [] +1 1 0 [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [1,1,1] [0,0,0] +1 0 [] [] +1 0 [] [] +0 6 [0,0,0,0,0,0] [1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 9 [1,1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 2 [1,1] [0,0] +1 0 [] [] +1 0 [] [] +0 5 [0,0,0,0,0] [1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 8 [1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 1 [1] [0] +1 0 [] [] +1 0 [] [] +0 4 [0,0,0,0] [1,1,1,1] +1 0 [] [] +1 0 [] [] +0 7 [1,1,1,1,1,1,1] [1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [0,0,0] [1,1,1] +1 0 [] [] +1 0 [] [] +0 +2 +3 +5 +6 +8 +9 +11 +12 +14 +15 +17 +18 +20 +21 +23 +24 +26 +27 +29 +30 +32 +33 +35 +MergeTree compact +test +[] 1 0 0 [] +1 0 1 0 [] +\N 1 1 0 [] +['str_3','str_3','str_3'] 1 0 3 [1,1,1] +4 0 1 0 [] +\N 1 1 0 [] +[6,6,6,6,6,6] 1 0 6 [0,0,0,0,0,0] +7 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 9 [1,1,1,1,1,1,1,1,1] +10 0 1 0 [] +\N 1 1 0 [] +['str_12','str_12'] 1 0 2 [1,1] +13 0 1 0 [] +\N 1 1 0 [] +[15,15,15,15,15] 1 0 5 [0,0,0,0,0] +16 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 8 [1,1,1,1,1,1,1,1] +19 0 1 0 [] +\N 1 1 0 [] +['str_21'] 1 0 1 [1] +22 0 1 0 [] +\N 1 1 0 [] +[24,24,24,24] 1 0 4 [0,0,0,0] +25 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 7 [1,1,1,1,1,1,1] +28 0 1 0 [] +\N 1 1 0 [] +[] 1 0 0 [] +31 0 1 0 [] +\N 1 1 0 [] +[33,33,33] 1 0 3 [0,0,0] +34 0 1 0 [] +\N 1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [1,1,1] +0 1 0 [] +1 1 0 [] +1 0 6 [0,0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 9 [1,1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 2 [1,1] +0 1 0 [] +1 1 0 [] +1 0 5 [0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 8 [1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 1 [1] +0 1 0 [] +1 1 0 [] +1 0 4 [0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 7 [1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [0,0,0] +0 1 0 [] +1 1 0 [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [1,1,1] [0,0,0] +1 0 [] [] +1 0 [] [] +0 6 [0,0,0,0,0,0] [1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 9 [1,1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 2 [1,1] [0,0] +1 0 [] [] +1 0 [] [] +0 5 [0,0,0,0,0] [1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 8 [1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 1 [1] [0] +1 0 [] [] +1 0 [] [] +0 4 [0,0,0,0] [1,1,1,1] +1 0 [] [] +1 0 [] [] +0 7 [1,1,1,1,1,1,1] [1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [0,0,0] [1,1,1] +1 0 [] [] +1 0 [] [] +0 +2 +3 +5 +6 +8 +9 +11 +12 +14 +15 +17 +18 +20 +21 +23 +24 +26 +27 +29 +30 +32 +33 +35 +MergeTree wide +test +[] 1 0 0 [] +1 0 1 0 [] +\N 1 1 0 [] +['str_3','str_3','str_3'] 1 0 3 [1,1,1] +4 0 1 0 [] +\N 1 1 0 [] +[6,6,6,6,6,6] 1 0 6 [0,0,0,0,0,0] +7 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 9 [1,1,1,1,1,1,1,1,1] +10 0 1 0 [] +\N 1 1 0 [] +['str_12','str_12'] 1 0 2 [1,1] +13 0 1 0 [] +\N 1 1 0 [] +[15,15,15,15,15] 1 0 5 [0,0,0,0,0] +16 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 8 [1,1,1,1,1,1,1,1] +19 0 1 0 [] +\N 1 1 0 [] +['str_21'] 1 0 1 [1] +22 0 1 0 [] +\N 1 1 0 [] +[24,24,24,24] 1 0 4 [0,0,0,0] +25 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 7 [1,1,1,1,1,1,1] +28 0 1 0 [] +\N 1 1 0 [] +[] 1 0 0 [] +31 0 1 0 [] +\N 1 1 0 [] +[33,33,33] 1 0 3 [0,0,0] +34 0 1 0 [] +\N 1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [1,1,1] +0 1 0 [] +1 1 0 [] +1 0 6 [0,0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 9 [1,1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 2 [1,1] +0 1 0 [] +1 1 0 [] +1 0 5 [0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 8 [1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 1 [1] +0 1 0 [] +1 1 0 [] +1 0 4 [0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 7 [1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [0,0,0] +0 1 0 [] +1 1 0 [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [1,1,1] [0,0,0] +1 0 [] [] +1 0 [] [] +0 6 [0,0,0,0,0,0] [1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 9 [1,1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 2 [1,1] [0,0] +1 0 [] [] +1 0 [] [] +0 5 [0,0,0,0,0] [1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 8 [1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 1 [1] [0] +1 0 [] [] +1 0 [] [] +0 4 [0,0,0,0] [1,1,1,1] +1 0 [] [] +1 0 [] [] +0 7 [1,1,1,1,1,1,1] [1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [0,0,0] [1,1,1] +1 0 [] [] +1 0 [] [] +0 +2 +3 +5 +6 +8 +9 +11 +12 +14 +15 +17 +18 +20 +21 +23 +24 +26 +27 +29 +30 +32 +33 +35 diff --git a/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh new file mode 100755 index 00000000000..8231691e184 --- /dev/null +++ b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1" + +function test() +{ + echo "test" + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 2, NULL, number % 3 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10))) from numbers(36)" + $CH_CLIENT -q "select v, v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id" + $CH_CLIENT -q "select v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id" + $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null, v.\`Array(Variant(String, UInt64))\`.String.null from test order by id" + $CH_CLIENT -q "select id from test where v.UInt64 is null order by id" + + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 2, NULL, number % 3 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10))) from numbers(1000000) settings min_insert_block_size_rows=100000" + $CH_CLIENT -q "select v, v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id format Null" + $CH_CLIENT -q "select v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id format Null" + $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null, v.\`Array(Variant(String, UInt64))\`.String.null from test order by id format Null" + $CH_CLIENT -q "select id from test where v.UInt64 is null order by id format Null" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=Memory" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference new file mode 100644 index 00000000000..8740726c7ef --- /dev/null +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference @@ -0,0 +1,57 @@ +Memory +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 +MergeTree compact +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 +MergeTree wide +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh new file mode 100755 index 00000000000..aa06e48376c --- /dev/null +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "test" + $CH_CLIENT -q "insert into test select number, number from numbers(10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(10, 10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(20, 10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, NULL from numbers(30, 10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(40, 40) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(10, 10) settings min_insert_block_size_rows=50000" + + $CH_CLIENT -q "select distinct dynamicType(d) as type from test order by type" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'UInt64'" + $CH_CLIENT -q "select count() from test where d.UInt64 is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'String'" + $CH_CLIENT -q "select count() from test where d.String is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Date'" + $CH_CLIENT -q "select count() from test where d.Date is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'" + $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Variant(String, UInt64))\`)" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'" + $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Array(Dynamic))\`)" + $CH_CLIENT -q "select count() from test where d is NULL" + $CH_CLIENT -q "select count() from test where not empty(d.\`Tuple(a Array(Dynamic))\`.a.String)" + + $CH_CLIENT -q "select d, d.UInt64.null, d.String.null, d.\`Array(Variant(String, UInt64))\`.null from test format Null" + $CH_CLIENT -q "select d.UInt64.null, d.String.null, d.\`Array(Variant(String, UInt64))\`.null from test format Null" + $CH_CLIENT -q "select d.Int8.null, d.Date.null, d.\`Array(String)\`.null from test format Null" + $CH_CLIENT -q "select d, d.UInt64.null, d.Date.null, d.\`Array(Variant(String, UInt64))\`.null, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" + $CH_CLIENT -q "select d.UInt64.null, d.Date.null, d.\`Array(Variant(String, UInt64))\`.null, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null, d.\`Array(Variant(String, UInt64))\`.String.null from test format Null" + $CH_CLIENT -q "select d, d.\`Tuple(a UInt64, b String)\`.a, d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64.null, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" + $CH_CLIENT -q "select d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64.null, d.\`Array(Dynamic)\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" + $CH_CLIENT -q "select d.\`Array(Array(Dynamic))\`.size1, d.\`Array(Array(Dynamic))\`.UInt64.null, d.\`Array(Array(Dynamic))\`.\`Map(String, Tuple(a UInt64))\`.values.a from test format Null" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" From def7408b48f0e6b557d143a2efedc243e4416dc8 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Sat, 6 Jul 2024 22:57:41 +0200 Subject: [PATCH 434/439] Fix typo --- .../Serializations/SerializationVariantElementNullMap.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp b/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp index 4e355fbb8ef..f30da4fecf9 100644 --- a/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp @@ -118,8 +118,8 @@ void SerializationVariantElementNullMap::deserializeBinaryBulkWithMultipleStream } else { - /// There is no such stream or cached data, it means that there is no Variant column in this part (it could happend after alter table add column). - /// In such cases columns are filled with default values, but for null-map column default value should be 1, not 0. Fill column with 1 here instead + /// There is no such stream or cached data, it means that there is no Variant column in this part (it could happen after alter table add column). + /// In such cases columns are filled with default values, but for null-map column default value should be 1, not 0. Fill column with 1 here instead. MutableColumnPtr mutable_column = result_column->assumeMutable(); auto & data = assert_cast(*mutable_column).getData(); data.resize_fill(data.size() + limit, 1); From e64e9f51ece90fcbfc98234ac65cc7e6bac2e2bd Mon Sep 17 00:00:00 2001 From: Tyler Hannan Date: Sun, 7 Jul 2024 14:57:26 +0200 Subject: [PATCH 435/439] Update README.md removing completed events. release call update on monday --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index dc253d4db2d..3d7d7441081 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,6 @@ Every month we get together with the community (users, contributors, customers, Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. -* [AWS Summit in DC](https://clickhouse.com/company/events/2024-06-aws-summit-dc) - Jun 26 -* [ClickHouse Meetup in Amsterdam](https://www.meetup.com/clickhouse-netherlands-user-group/events/300781068/) - Jun 27 * [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/300783448/) - Jul 9 * [ClickHouse Cloud - Live Update Call](https://clickhouse.com/company/events/202407-cloud-update-live) - Jul 9 * [ClickHouse Meetup @ Ramp - New York City](https://www.meetup.com/clickhouse-new-york-user-group/events/300595845/) - Jul 9 From 5aedbac37d482644523ae3c4c720971cea6c5502 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 7 Jul 2024 16:52:29 +0200 Subject: [PATCH 436/439] Fix 01246_buffer_flush flakiness - reduce min_time for Buffer's min test - rewrite the test to .sh to avoid extra sleeping time (with .sql we have to wait the max time) - change the assertion for min test, the time there should not exceed max time (100 seconds), this should fix with test flakiness [1] even after [2]. [1]: https://s3.amazonaws.com/clickhouse-test-reports/0/76119a4567ce2ac9c0aff715c1a9ba2607e806e0/stateless_tests__tsan__[3_5].html [2]: https://github.com/ClickHouse/ClickHouse/pull/65310 Signed-off-by: Azat Khuzhin --- .../queries/0_stateless/01246_buffer_flush.sh | 76 +++++++++++++++++++ .../0_stateless/01246_buffer_flush.sql | 50 ------------ 2 files changed, 76 insertions(+), 50 deletions(-) create mode 100755 tests/queries/0_stateless/01246_buffer_flush.sh delete mode 100644 tests/queries/0_stateless/01246_buffer_flush.sql diff --git a/tests/queries/0_stateless/01246_buffer_flush.sh b/tests/queries/0_stateless/01246_buffer_flush.sh new file mode 100755 index 00000000000..1ca953c80d9 --- /dev/null +++ b/tests/queries/0_stateless/01246_buffer_flush.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function elapsed_sec() +{ + local expr=$1 && shift + local start end + start=$(date +%s.%N) + while ! eval "$expr"; do + sleep 0.5 + done + end=$(date +%s.%N) + $CLICKHOUSE_LOCAL -q "select floor($end-$start)" +} + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data_01256; + drop table if exists buffer_01256; + + create table data_01256 as system.numbers Engine=Memory(); +" + +echo "min" +$CLICKHOUSE_CLIENT -nm -q " + create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, + 2, 100, /* time */ + 4, 100, /* rows */ + 1, 1e6 /* bytes */ + ); + insert into buffer_01256 select * from system.numbers limit 5; + select count() from data_01256; +" +sec=$(elapsed_sec '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 5 ]]') +[[ $sec -ge 2 ]] || echo "Buffer flushed too early, min_time=2, flushed after $sec sec" +[[ $sec -lt 100 ]] || echo "Buffer flushed too late, max_time=100, flushed after $sec sec" +$CLICKHOUSE_CLIENT -q "select count() from data_01256" +$CLICKHOUSE_CLIENT -q "drop table buffer_01256" + +echo "max" +$CLICKHOUSE_CLIENT -nm -q " + create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, + 100, 2, /* time */ + 0, 100, /* rows */ + 0, 1e6 /* bytes */ + ); + insert into buffer_01256 select * from system.numbers limit 5; + select count() from data_01256; +" +sec=$(elapsed_sec '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 10 ]]') +[[ $sec -ge 2 ]] || echo "Buffer flushed too early, max_time=2, flushed after $sec sec" +$CLICKHOUSE_CLIENT -q "select count() from data_01256" +$CLICKHOUSE_CLIENT -q "drop table buffer_01256" + +echo "direct" +$CLICKHOUSE_CLIENT -nm -q " + create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, + 100, 100, /* time */ + 0, 9, /* rows */ + 0, 1e6 /* bytes */ + ); + insert into buffer_01256 select * from system.numbers limit 10; + select count() from data_01256; +" + +echo "drop" +$CLICKHOUSE_CLIENT -nm -q " + insert into buffer_01256 select * from system.numbers limit 10; + drop table if exists buffer_01256; + select count() from data_01256; +" + +$CLICKHOUSE_CLIENT -q "drop table data_01256" diff --git a/tests/queries/0_stateless/01246_buffer_flush.sql b/tests/queries/0_stateless/01246_buffer_flush.sql deleted file mode 100644 index 66f93371c29..00000000000 --- a/tests/queries/0_stateless/01246_buffer_flush.sql +++ /dev/null @@ -1,50 +0,0 @@ --- Tags: no-fasttest - -SET function_sleep_max_microseconds_per_block = 4000000; - -drop table if exists data_01256; -drop table if exists buffer_01256; - -create table data_01256 as system.numbers Engine=Memory(); - -select 'min'; -create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, - 5, 100, /* time */ - 4, 100, /* rows */ - 1, 1e6 /* bytes */ -); -insert into buffer_01256 select * from system.numbers limit 5; -select count() from data_01256; --- It is enough to ensure that the buffer will be flushed earlier then 2*min_time (10 sec) -select sleepEachRow(9) FORMAT Null SETTINGS function_sleep_max_microseconds_per_block=10e6; -select count() from data_01256; -drop table buffer_01256; - -select 'max'; -create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, - 100, 2, /* time */ - 0, 100, /* rows */ - 0, 1e6 /* bytes */ -); -insert into buffer_01256 select * from system.numbers limit 5; -select count() from data_01256; --- sleep 2 (min time) + 1 (round up) + bias (1) = 4 -select sleepEachRow(2) from numbers(2) FORMAT Null; -select count() from data_01256; -drop table buffer_01256; - -select 'direct'; -create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, - 100, 100, /* time */ - 0, 9, /* rows */ - 0, 1e6 /* bytes */ -); -insert into buffer_01256 select * from system.numbers limit 10; -select count() from data_01256; - -select 'drop'; -insert into buffer_01256 select * from system.numbers limit 10; -drop table if exists buffer_01256; -select count() from data_01256; - -drop table data_01256; From 3fa1fd321fe3e2be5155b87795912f4ab3ca8f48 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 8 Jul 2024 07:38:07 +0000 Subject: [PATCH 437/439] Fix typo --- docs/en/operations/system-tables/metrics.md | 2 +- src/Common/CurrentMetrics.cpp | 2 +- src/Coordination/KeeperConstants.cpp | 2 +- src/Coordination/KeeperDispatcher.cpp | 14 +++++++------- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 83ce817b7db..f253b164e2a 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -357,7 +357,7 @@ Number of currently running inserts to Kafka Number of alive connections -### KeeperOutstandingRequets +### KeeperOutstandingRequests Number of outstanding requests diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 8516a88c7af..7c97e73f278 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -267,7 +267,7 @@ M(AsyncInsertCacheSize, "Number of async insert hash id in cache") \ M(S3Requests, "S3 requests count") \ M(KeeperAliveConnections, "Number of alive connections") \ - M(KeeperOutstandingRequets, "Number of outstanding requests") \ + M(KeeperOutstandingRequests, "Number of outstanding requests") \ M(ThreadsInOvercommitTracker, "Number of waiting threads inside of OvercommitTracker") \ M(IOUringPendingEvents, "Number of io_uring SQEs waiting to be submitted") \ M(IOUringInFlightEvents, "Number of io_uring SQEs in flight") \ diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp index b4241235cc7..7589e3393be 100644 --- a/src/Coordination/KeeperConstants.cpp +++ b/src/Coordination/KeeperConstants.cpp @@ -372,7 +372,7 @@ extern const std::vector keeper_profile_events M(AsynchronousReadWait) \ M(S3Requests) \ M(KeeperAliveConnections) \ - M(KeeperOutstandingRequets) \ + M(KeeperOutstandingRequests) \ M(ThreadsInOvercommitTracker) \ M(IOUringPendingEvents) \ M(IOUringInFlightEvents) \ diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index f36b1ef151f..6f57fa6d2e2 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -28,7 +28,7 @@ namespace CurrentMetrics { extern const Metric KeeperAliveConnections; - extern const Metric KeeperOutstandingRequets; + extern const Metric KeeperOutstandingRequests; } namespace ProfileEvents @@ -139,7 +139,7 @@ void KeeperDispatcher::requestThread() { if (requests_queue->tryPop(request, max_wait)) { - CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequests); if (shutdown_called) break; @@ -171,7 +171,7 @@ void KeeperDispatcher::requestThread() /// Trying to get batch requests as fast as possible if (requests_queue->tryPop(request)) { - CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequests); /// Don't append read request into batch, we have to process them separately if (!coordination_settings->quorum_reads && request.request->isReadRequest()) { @@ -419,7 +419,7 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ { throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot push request to queue within operation timeout"); } - CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequests); return true; } @@ -543,7 +543,7 @@ void KeeperDispatcher::shutdown() /// Set session expired for all pending requests while (requests_queue && requests_queue->tryPop(request_for_session)) { - CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequests); auto response = request_for_session.request->makeResponse(); response->error = Coordination::Error::ZSESSIONEXPIRED; setResponse(request_for_session.session_id, response); @@ -670,7 +670,7 @@ void KeeperDispatcher::sessionCleanerTask() }; if (!requests_queue->push(std::move(request_info))) LOG_INFO(log, "Cannot push close request to queue while cleaning outdated sessions"); - CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequests); /// Remove session from registered sessions finishSession(dead_session); @@ -794,7 +794,7 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) /// Push new session request to queue if (!requests_queue->tryPush(std::move(request_info), session_timeout_ms)) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot push session id request to queue within session timeout"); - CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequests); if (future.wait_for(std::chrono::milliseconds(session_timeout_ms)) != std::future_status::ready) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot receive session id within session timeout"); diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 02bc520743f..cbda66b7bf9 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -449,7 +449,7 @@ Kahan Kaser KeeperAliveConnections KeeperMap -KeeperOutstandingRequets +KeeperOutstandingRequests Kerberos Khanna KittenHouse From 5812a65ea091b4455123d5e29a8c161c29c5fb8b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 8 Jul 2024 07:46:57 +0000 Subject: [PATCH 438/439] Rename test (query cache tests start with 02494) --- ...ty_tuple.reference => 02494_query_cache_empty_tuple.reference} | 0 ...ry_cache_empty_tuple.sql => 02494_query_cache_empty_tuple.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{03201_query_cache_empty_tuple.reference => 02494_query_cache_empty_tuple.reference} (100%) rename tests/queries/0_stateless/{03201_query_cache_empty_tuple.sql => 02494_query_cache_empty_tuple.sql} (100%) diff --git a/tests/queries/0_stateless/03201_query_cache_empty_tuple.reference b/tests/queries/0_stateless/02494_query_cache_empty_tuple.reference similarity index 100% rename from tests/queries/0_stateless/03201_query_cache_empty_tuple.reference rename to tests/queries/0_stateless/02494_query_cache_empty_tuple.reference diff --git a/tests/queries/0_stateless/03201_query_cache_empty_tuple.sql b/tests/queries/0_stateless/02494_query_cache_empty_tuple.sql similarity index 100% rename from tests/queries/0_stateless/03201_query_cache_empty_tuple.sql rename to tests/queries/0_stateless/02494_query_cache_empty_tuple.sql From 21e7ef6e42770bbd3d544dab1e8a78e980a0de0e Mon Sep 17 00:00:00 2001 From: Tyler Hannan Date: Mon, 8 Jul 2024 13:44:41 +0200 Subject: [PATCH 439/439] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3d7d7441081..98f9108f14c 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ curl https://clickhouse.com/ | sh Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. -* [v24.6 Community Call](https://clickhouse.com/company/events/v24-6-community-release-call) - Jul 2 +* [v24.7 Community Call](https://clickhouse.com/company/events/v24-7-community-release-call) - Jul 30 ## Upcoming Events