From 413e190916303bbb1ddf8556ef15d27e0f8a0354 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 10 Jan 2024 15:10:39 +0000 Subject: [PATCH 001/273] enable optimize_functions_to_subcolumns y default --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0e6da579b10..5200e9f775a 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -580,7 +580,7 @@ class IColumn; M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \ M(Bool, optimize_monotonous_functions_in_order_by, false, "Replace monotonous function with its argument in ORDER BY", 0) \ - M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ + M(Bool, optimize_functions_to_subcolumns, true, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \ M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \ M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \ From c1fc12fd35ab3a41edceb68ec3679bed11a69577 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 10 Jan 2024 22:51:06 +0000 Subject: [PATCH 002/273] some fixes for optimize_functions_to_subcolumns --- .../Passes/FunctionToSubcolumnsPass.cpp | 223 +++++++++++++++--- .../RewriteFunctionToSubcolumnVisitor.cpp | 2 +- .../RewriteFunctionToSubcolumnVisitor.h | 1 + src/Interpreters/TreeOptimizer.cpp | 41 +++- 4 files changed, 231 insertions(+), 36 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index cd635f87e0e..932e715a935 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -23,48 +23,194 @@ namespace DB namespace { -class FunctionToSubcolumnsVisitor : public InDepthQueryTreeVisitorWithContext +std::tuple getTypedNodesForOptimization(const QueryTreeNodePtr & node) +{ + auto * function_node = node->as(); + if (!function_node) + return {}; + + auto & function_arguments_nodes = function_node->getArguments().getNodes(); + if (function_arguments_nodes.empty() || function_arguments_nodes.size() > 2) + return {}; + auto * first_argument_column_node = function_arguments_nodes.front()->as(); + if (!first_argument_column_node) + return {}; + + auto column_source = first_argument_column_node->getColumnSource(); + auto * table_node = column_source->as(); + if (!table_node) + return {}; + + if (!table_node->getStorageSnapshot()) + return {}; + + if (!table_node->getStorage()->supportsSubcolumns()) + return {}; + + return std::make_tuple(function_node, first_argument_column_node, table_node); +} + +class FunctionToSubcolumnsVisitorFirstPass : public InDepthQueryTreeVisitorWithContext { public: - using Base = InDepthQueryTreeVisitorWithContext; + using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void enterImpl(QueryTreeNodePtr & node) const + struct Data { - if (!getSettings().optimize_functions_to_subcolumns) + std::unordered_set all_key_columns; + std::unordered_map indentifiers_count; + std::unordered_map optimized_identifiers_count; + }; + + Data getData() const { return data; } + + void enterImpl(const QueryTreeNodePtr & node) + { + if (auto * column_node = node->as()) + { + enterImpl(*column_node); return; + } - auto * function_node = node->as(); - if (!function_node) + auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node); + if (function_node && first_argument_node && table_node) + { + enterImpl(*function_node, *first_argument_node, *table_node); return; + } + } - auto & function_arguments_nodes = function_node->getArguments().getNodes(); - size_t function_arguments_nodes_size = function_arguments_nodes.size(); - - if (function_arguments_nodes.empty() || function_arguments_nodes_size > 2) - return; - - auto * first_argument_column_node = function_arguments_nodes.front()->as(); - - if (!first_argument_column_node) - return; - - auto column_source = first_argument_column_node->getColumnSource(); +private: + void enterImpl(const ColumnNode & column_node) + { + auto column_source = column_node.getColumnSource(); auto * table_node = column_source->as(); - if (!table_node) return; - const auto & storage = table_node->getStorage(); - if (!storage->supportsSubcolumns()) - return; + auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); + Identifier qualified_name({table_name, column_node.getColumnName()}); - auto column = first_argument_column_node->getColumn(); + ++data.indentifiers_count[qualified_name]; + + if (processed_tables.emplace(table_name).second) + { + const auto & metadata_snapshot = table_node->getStorageSnapshot()->metadata; + + auto add_key_columns = [&](const auto & key_columns) + { + for (const auto & column_name : key_columns) + { + Identifier identifier({table_name, column_name}); + data.all_key_columns.insert(identifier); + } + }; + + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + + const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey(); + add_key_columns(primary_key_columns); + + const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); + add_key_columns(partition_key_columns); + + for (const auto & index : metadata_snapshot->getSecondaryIndices()) + { + const auto & index_columns = index.expression->getRequiredColumns(); + add_key_columns(index_columns); + } + } + } + + void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node) + { + const auto & function_arguments_nodes = function_node.getArguments().getNodes(); + const auto & function_name = function_node.getFunctionName(); + + auto column = first_argument_column_node.getColumn(); WhichDataType column_type(column.type); + auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); + Identifier qualified_name({table_name, column.name}); + + if (function_arguments_nodes.size() == 1) + { + if (column_type.isArray()) + { + if (function_name == "length" || function_name == "empty" || function_name == "notEmpty") + ++data.optimized_identifiers_count[qualified_name]; + } + else if (column_type.isNullable()) + { + if (function_name == "isNull" || function_name == "isNotNull") + ++data.optimized_identifiers_count[qualified_name]; + } + else if (column_type.isMap()) + { + if (function_name == "mapKeys" || function_name == "mapValues") + ++data.optimized_identifiers_count[qualified_name]; + } + } + else if (function_arguments_nodes.size() == 2) + { + const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); + if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) + { + /** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` + * with `tuple_argument.column_name`. + */ + const auto & tuple_element_constant_value = second_argument_constant_node->getValue(); + const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType(); + + if (tuple_element_constant_value_type == Field::Types::String || tuple_element_constant_value_type == Field::Types::UInt64) + ++data.optimized_identifiers_count[qualified_name]; + } + else if (function_name == "mapContains" && column_type.isMap()) + { + ++data.optimized_identifiers_count[qualified_name]; + } + } + } + + Data data; + NameSet processed_tables; +}; + + +class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext +{ +public: + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; + + FunctionToSubcolumnsVisitorSecondPass(ContextPtr context_, std::unordered_set identifiers_to_optimize_) + : Base(std::move(context_)), identifiers_to_optimize(std::move(identifiers_to_optimize_)) + { + } + + void enterImpl(QueryTreeNodePtr & node) const + { + auto [function_node, first_argument_column_node, table_node] = getTypedNodesForOptimization(node); + if (!function_node || !first_argument_column_node || !table_node) + return; + + auto & function_arguments_nodes = function_node->getArguments().getNodes(); const auto & function_name = function_node->getFunctionName(); - if (function_arguments_nodes_size == 1) + auto column = first_argument_column_node->getColumn(); + auto column_source = first_argument_column_node->getColumnSource(); + WhichDataType column_type(column.type); + + auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); + Identifier qualified_name({table_name, column.name}); + + if (!identifiers_to_optimize.contains(qualified_name)) + return; + + if (function_arguments_nodes.size() == 1) { if (column_type.isArray()) { @@ -72,7 +218,6 @@ public: { /// Replace `length(array_argument)` with `array_argument.size0` column.name += ".size0"; - node = std::make_shared(column, column_source); } else if (function_name == "empty") @@ -106,7 +251,6 @@ public: { /// Replace `isNull(nullable_argument)` with `nullable_argument.null` column.name += ".null"; - node = std::make_shared(column, column_source); } else if (function_name == "isNotNull") @@ -140,10 +284,9 @@ public: } } } - else + else if (function_arguments_nodes.size() == 2) { const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); - if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) { /** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` @@ -193,6 +336,8 @@ public: } private: + std::unordered_set identifiers_to_optimize; + inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const { auto function = FunctionFactory::instance().get(function_name, getContext()); @@ -204,8 +349,26 @@ private: void FunctionToSubcolumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) { - FunctionToSubcolumnsVisitor visitor(context); - visitor.visit(query_tree_node); + if (!context->getSettingsRef().optimize_functions_to_subcolumns) + return; + + std::unordered_set identifiers_to_optimize; + + { + FunctionToSubcolumnsVisitorFirstPass visitor(context); + visitor.visit(query_tree_node); + + auto data = visitor.getData(); + for (const auto & [identifier, count] : data.optimized_identifiers_count) + if (!data.all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) + identifiers_to_optimize.insert(identifier); + } + + if (!identifiers_to_optimize.empty()) + { + FunctionToSubcolumnsVisitorSecondPass visitor(std::move(context), std::move(identifiers_to_optimize)); + visitor.visit(query_tree_node); + } } } diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 506fa13b7ba..5747ce5a3a1 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -78,7 +78,7 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) const auto & columns = metadata_snapshot->getColumns(); const auto & name_in_storage = identifier->name(); - if (!columns.has(name_in_storage)) + if (!columns.has(name_in_storage) || forbidden_identifiers.contains(name_in_storage)) return; const auto & column_type = columns.get(name_in_storage).type; diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h index 4d064bdee10..3c945da92ec 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h @@ -17,6 +17,7 @@ public: void visit(ASTFunction & function, ASTPtr & ast) const; StorageMetadataPtr metadata_snapshot; + IdentifierNameSet forbidden_identifiers; }; using RewriteFunctionToSubcolumnMatcher = OneTypeMatcher; diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 729e2ed6007..24599ed0044 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -656,10 +656,41 @@ void transformIfStringsIntoEnum(ASTPtr & query) ConvertStringsToEnumVisitor(convert_data).visit(query); } -void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & metadata_snapshot) +void optimizeFunctionsToSubcolumns(ASTPtr & query, const TreeRewriterResult & result) { - RewriteFunctionToSubcolumnVisitor::Data data{metadata_snapshot}; - RewriteFunctionToSubcolumnVisitor(data).visit(query); + if (!result.storage || !result.storage->supportsSubcolumns() || !result.storage_snapshot) + return; + + const auto & metadata_snapshot = result.storage_snapshot->metadata; + // const auto & select_query = assert_cast(*query); + + /// For queries with FINAL converting function to subcolumn may alter + /// special merging algorithms and produce wrong result of query. + // if (select_query.final()) + // return; + + // FindIdentifiersForbiddenToReplaceToSubcolumnsVisitor::Data data; + // FindIdentifiersForbiddenToReplaceToSubcolumnsVisitor(data).visit(query); + + IdentifierNameSet forbidden_identifiers; + + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + const auto & primary_key_columns = result.storage_snapshot->metadata->getColumnsRequiredForPrimaryKey(); + forbidden_identifiers.insert(primary_key_columns.begin(), primary_key_columns.end()); + + const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); + forbidden_identifiers.insert(partition_key_columns.begin(), partition_key_columns.end()); + + for (const auto & index : metadata_snapshot->getSecondaryIndices()) + { + const auto & index_columns = index.expression->getRequiredColumns(); + forbidden_identifiers.insert(index_columns.begin(), index_columns.end()); + } + + RewriteFunctionToSubcolumnVisitor::Data rewrite_data{metadata_snapshot, forbidden_identifiers}; + RewriteFunctionToSubcolumnVisitor(rewrite_data).visit(query); } void optimizeOrLikeChain(ASTPtr & query) @@ -726,8 +757,8 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, if (!select_query) throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts."); - if (settings.optimize_functions_to_subcolumns && result.storage_snapshot && result.storage->supportsSubcolumns()) - optimizeFunctionsToSubcolumns(query, result.storage_snapshot->metadata); + if (settings.optimize_functions_to_subcolumns) + optimizeFunctionsToSubcolumns(query, result); /// Move arithmetic operations out of aggregation functions if (settings.optimize_arithmetic_operations_in_aggregate_functions) From f79202bd532594cd441be928a11691a51fe88e65 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 11 Jan 2024 16:12:41 +0000 Subject: [PATCH 003/273] some fixes for optimize_functions_to_subcolumns --- .../Passes/FunctionToSubcolumnsPass.cpp | 57 ++++++++++++------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 932e715a935..d901692ba27 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace DB { @@ -32,19 +33,14 @@ std::tuple getTypedNodesForOptimizati auto & function_arguments_nodes = function_node->getArguments().getNodes(); if (function_arguments_nodes.empty() || function_arguments_nodes.size() > 2) return {}; + auto * first_argument_column_node = function_arguments_nodes.front()->as(); if (!first_argument_column_node) return {}; auto column_source = first_argument_column_node->getColumnSource(); auto * table_node = column_source->as(); - if (!table_node) - return {}; - - if (!table_node->getStorageSnapshot()) - return {}; - - if (!table_node->getStorage()->supportsSubcolumns()) + if (!table_node || !table_node->getStorageSnapshot() || !table_node->getStorage()->supportsSubcolumns()) return {}; return std::make_tuple(function_node, first_argument_column_node, table_node); @@ -58,6 +54,7 @@ public: struct Data { + bool has_final = false; std::unordered_set all_key_columns; std::unordered_map indentifiers_count; std::unordered_map optimized_identifiers_count; @@ -67,12 +64,31 @@ public: void enterImpl(const QueryTreeNodePtr & node) { + if (data.has_final) + return; + if (auto * column_node = node->as()) { enterImpl(*column_node); return; } + if (auto * table_node = node->as()) + { + if (table_node->hasTableExpressionModifiers() + && table_node->getTableExpressionModifiers()->hasFinal()) + data.has_final = true; + return; + } + + if (auto * table_function_node = node->as()) + { + if (table_function_node->hasTableExpressionModifiers() + && table_function_node->getTableExpressionModifiers()->hasFinal()) + data.has_final = true; + return; + } + auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node); if (function_node && first_argument_node && table_node) { @@ -159,9 +175,6 @@ private: const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) { - /** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` - * with `tuple_argument.column_name`. - */ const auto & tuple_element_constant_value = second_argument_constant_node->getValue(); const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType(); @@ -352,22 +365,22 @@ void FunctionToSubcolumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr if (!context->getSettingsRef().optimize_functions_to_subcolumns) return; + FunctionToSubcolumnsVisitorFirstPass first_visitor(context); + first_visitor.visit(query_tree_node); + auto data = first_visitor.getData(); + + if (data.has_final) + return; + std::unordered_set identifiers_to_optimize; - - { - FunctionToSubcolumnsVisitorFirstPass visitor(context); - visitor.visit(query_tree_node); - - auto data = visitor.getData(); - for (const auto & [identifier, count] : data.optimized_identifiers_count) - if (!data.all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) - identifiers_to_optimize.insert(identifier); - } + for (const auto & [identifier, count] : data.optimized_identifiers_count) + if (!data.all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) + identifiers_to_optimize.insert(identifier); if (!identifiers_to_optimize.empty()) { - FunctionToSubcolumnsVisitorSecondPass visitor(std::move(context), std::move(identifiers_to_optimize)); - visitor.visit(query_tree_node); + FunctionToSubcolumnsVisitorSecondPass second_visitor(std::move(context), std::move(identifiers_to_optimize)); + second_visitor.visit(query_tree_node); } } From cb4c78af59abca8788dfbd880fa6b7042fca3e6c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 11 Jan 2024 23:13:04 +0000 Subject: [PATCH 004/273] fix optimize_functions_to_subcolumns with old analyzer --- .../Passes/FunctionToSubcolumnsPass.cpp | 131 ++++++++++-------- src/DataTypes/DataTypeTuple.cpp | 4 +- .../RewriteFunctionToSubcolumnVisitor.cpp | 126 +++++++++++++---- .../RewriteFunctionToSubcolumnVisitor.h | 34 ++++- src/Interpreters/TreeOptimizer.cpp | 50 +++++-- .../02286_tuple_numeric_identifier.sql | 6 +- 6 files changed, 238 insertions(+), 113 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index d901692ba27..82d50f5fdb1 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -40,7 +40,7 @@ std::tuple getTypedNodesForOptimizati auto column_source = first_argument_column_node->getColumnSource(); auto * table_node = column_source->as(); - if (!table_node || !table_node->getStorageSnapshot() || !table_node->getStorage()->supportsSubcolumns()) + if (!table_node || !table_node->getStorage()->supportsSubcolumns()) return {}; return std::make_tuple(function_node, first_argument_column_node, table_node); @@ -67,28 +67,18 @@ public: if (data.has_final) return; + if (auto * table_node = node->as()) + { + enterImpl(*table_node); + return; + } + if (auto * column_node = node->as()) { enterImpl(*column_node); return; } - if (auto * table_node = node->as()) - { - if (table_node->hasTableExpressionModifiers() - && table_node->getTableExpressionModifiers()->hasFinal()) - data.has_final = true; - return; - } - - if (auto * table_function_node = node->as()) - { - if (table_function_node->hasTableExpressionModifiers() - && table_function_node->getTableExpressionModifiers()->hasFinal()) - data.has_final = true; - return; - } - auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node); if (function_node && first_argument_node && table_node) { @@ -98,6 +88,45 @@ public: } private: + Data data; + NameSet processed_tables; + + void enterImpl(const TableNode & table_node) + { + if (table_node.hasTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal()) + { + data.has_final = true; + return; + } + + auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); + if (processed_tables.emplace(table_name).second) + return; + + auto add_key_columns = [&](const auto & key_columns) + { + for (const auto & column_name : key_columns) + { + Identifier identifier({table_name, column_name}); + data.all_key_columns.insert(identifier); + } + }; + + const auto & metadata_snapshot = table_node.getStorageSnapshot()->metadata; + + const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey(); + add_key_columns(primary_key_columns); + + const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); + add_key_columns(partition_key_columns); + + for (const auto & index : metadata_snapshot->getSecondaryIndices()) + { + const auto & index_columns = index.expression->getRequiredColumns(); + add_key_columns(index_columns); + } + } + void enterImpl(const ColumnNode & column_node) { auto column_source = column_node.getColumnSource(); @@ -109,36 +138,6 @@ private: Identifier qualified_name({table_name, column_node.getColumnName()}); ++data.indentifiers_count[qualified_name]; - - if (processed_tables.emplace(table_name).second) - { - const auto & metadata_snapshot = table_node->getStorageSnapshot()->metadata; - - auto add_key_columns = [&](const auto & key_columns) - { - for (const auto & column_name : key_columns) - { - Identifier identifier({table_name, column_name}); - data.all_key_columns.insert(identifier); - } - }; - - /// Do not optimize index columns (primary, min-max, secondary), - /// because otherwise analysis of indexes may be broken. - /// TODO: handle subcolumns in index analysis. - - const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey(); - add_key_columns(primary_key_columns); - - const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); - add_key_columns(partition_key_columns); - - for (const auto & index : metadata_snapshot->getSecondaryIndices()) - { - const auto & index_columns = index.expression->getRequiredColumns(); - add_key_columns(index_columns); - } - } } void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node) @@ -187,9 +186,6 @@ private: } } } - - Data data; - NameSet processed_tables; }; @@ -214,15 +210,15 @@ public: const auto & function_name = function_node->getFunctionName(); auto column = first_argument_column_node->getColumn(); - auto column_source = first_argument_column_node->getColumnSource(); - WhichDataType column_type(column.type); - auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); - Identifier qualified_name({table_name, column.name}); + Identifier qualified_name({table_name, column.name}); if (!identifiers_to_optimize.contains(qualified_name)) return; + auto column_source = first_argument_column_node->getColumnSource(); + WhichDataType column_type(column.type); + if (function_arguments_nodes.size() == 1) { if (column_type.isArray()) @@ -369,19 +365,36 @@ void FunctionToSubcolumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr first_visitor.visit(query_tree_node); auto data = first_visitor.getData(); + /// For queries with FINAL converting function to subcolumn may alter + /// special merging algorithms and produce wrong result of query. if (data.has_final) return; + /// Do not optimize if full column is requested in other context. + /// It doesn't make sense because it doesn't reduce amount of read data + /// and optimized functions are not computation heavy. But introducing + /// new identifier complicates query analysis and may break it. + /// + /// E.g. query: + /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) + /// may be optimized to incorrect query: + /// SELECT n FROM table GROUP BY n HAVING not(n.null) + /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) + /// + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + std::unordered_set identifiers_to_optimize; for (const auto & [identifier, count] : data.optimized_identifiers_count) if (!data.all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) identifiers_to_optimize.insert(identifier); - if (!identifiers_to_optimize.empty()) - { - FunctionToSubcolumnsVisitorSecondPass second_visitor(std::move(context), std::move(identifiers_to_optimize)); - second_visitor.visit(query_tree_node); - } + if (identifiers_to_optimize.empty()) + return; + + FunctionToSubcolumnsVisitorSecondPass second_visitor(std::move(context), std::move(identifiers_to_optimize)); + second_visitor.visit(query_tree_node); } } diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 9cce59b0dca..6edbb8b27eb 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -32,7 +32,7 @@ namespace ErrorCodes extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH; - extern const int ILLEGAL_INDEX; + extern const int ARGUMENT_OUT_OF_BOUND; extern const int LOGICAL_ERROR; } @@ -270,7 +270,7 @@ std::optional DataTypeTuple::tryGetPositionByName(const String & name) c String DataTypeTuple::getNameByPosition(size_t i) const { if (i == 0 || i > names.size()) - throw Exception(ErrorCodes::ILLEGAL_INDEX, "Index of tuple element ({}) if out range ([1, {}])", i, names.size()); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index of tuple element ({}) is out range ([1, {}])", i, names.size()); return names[i - 1]; } diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 5747ce5a3a1..2a235ae31e4 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -63,42 +63,104 @@ const std::unordered_map getColumnFromArgumentsToOptimize( + const ASTs & arguments, + const StorageMetadataPtr & metadata_snapshot) { - const auto & arguments = function.arguments->children; if (arguments.empty() || arguments.size() > 2) - return; + return {}; const auto * identifier = arguments[0]->as(); if (!identifier) - return; + return {}; const auto & columns = metadata_snapshot->getColumns(); const auto & name_in_storage = identifier->name(); - if (!columns.has(name_in_storage) || forbidden_identifiers.contains(name_in_storage)) - return; + if (!columns.has(name_in_storage)) + return {}; const auto & column_type = columns.get(name_in_storage).type; - TypeIndex column_type_id = column_type->getTypeId(); + return NameAndTypePair{name_in_storage, column_type}; +} + +} + +void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTPtr & ast, Data & data) +{ + if (const auto * identifier = ast->as()) + { + ++data.indentifiers_count[identifier->name()]; + return; + } + + if (const auto * function = ast->as()) + { + visit(*function, data); + return; + } +} + +void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & function, Data & data) +{ + const auto & arguments = function.arguments->children; + auto column = getColumnFromArgumentsToOptimize(arguments, data.metadata_snapshot); + if (!column) + return; + + auto column_type_id = column->type->getTypeId(); + + if (arguments.size() == 1) + { + auto it = unary_function_to_subcolumn.find(function.name); + if (it != unary_function_to_subcolumn.end() && std::get<0>(it->second) == column_type_id) + ++data.optimized_identifiers_count[column->name]; + } + else + { + if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) + { + const auto * literal = arguments[1]->as(); + if (!literal) + return; + + auto value_type = literal->value.getType(); + if (value_type == Field::Types::UInt64 || value_type == Field::Types::String) + ++data.optimized_identifiers_count[column->name]; + } + else + { + auto it = binary_function_to_subcolumn.find(function.name); + if (it != binary_function_to_subcolumn.end() && std::get<0>(it->second) == column_type_id) + ++data.optimized_identifiers_count[column->name]; + } + } +} + +void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, ASTPtr & ast) const +{ + const auto & arguments = function.arguments->children; + auto column = getColumnFromArgumentsToOptimize(arguments, metadata_snapshot); + if (!column) + return; + + auto column_type_id = column->type->getTypeId(); const auto & alias = function.tryGetAlias(); if (arguments.size() == 1) { auto it = unary_function_to_subcolumn.find(function.name); - if (it != unary_function_to_subcolumn.end()) - { - const auto & [type_id, subcolumn_name, transformer] = it->second; - if (column_type_id == type_id) - { - ast = transformer(name_in_storage, subcolumn_name); - ast->setAlias(alias); - } - } + if (it == unary_function_to_subcolumn.end()) + return; + + const auto & [expected_type_id, subcolumn_name, transformer] = it->second; + if (column_type_id != expected_type_id) + return; + + ast = transformer(column->name, subcolumn_name); + ast->setAlias(alias); } - else + else if (arguments.size() == 2) { if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) { @@ -110,30 +172,34 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) auto value_type = literal->value.getType(); if (value_type == Field::Types::UInt64) { - const auto & type_tuple = assert_cast(*column_type); + const auto & type_tuple = assert_cast(*column->type); auto index = literal->value.get(); subcolumn_name = type_tuple.getNameByPosition(index); } else if (value_type == Field::Types::String) + { subcolumn_name = literal->value.get(); + } else + { return; + } - ast = transformToSubcolumn(name_in_storage, subcolumn_name); + ast = transformToSubcolumn(column->name, subcolumn_name); ast->setAlias(alias); } else { auto it = binary_function_to_subcolumn.find(function.name); - if (it != binary_function_to_subcolumn.end()) - { - const auto & [type_id, subcolumn_name, transformer] = it->second; - if (column_type_id == type_id) - { - ast = transformer(name_in_storage, subcolumn_name, arguments[1]); - ast->setAlias(alias); - } - } + if (it == binary_function_to_subcolumn.end()) + return; + + const auto & [expected_type_id, subcolumn_name, transformer] = it->second; + if (column_type_id != expected_type_id) + return; + + ast = transformer(column->name, subcolumn_name, arguments[1]); + ast->setAlias(alias); } } } diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h index 3c945da92ec..08eb6e27c52 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h @@ -7,20 +7,46 @@ namespace DB { class ASTFunction; +class ASTIdentifier; + +/// Collects info about identifiers to select columns to optimize to subcolumns. +class RewriteFunctionToSubcolumnFirstPassMatcher +{ +public: + struct Data + { + explicit Data(StorageMetadataPtr metadata_snapshot_) : metadata_snapshot(std::move(metadata_snapshot_)) {} + + StorageMetadataPtr metadata_snapshot; + std::unordered_map indentifiers_count; + std::unordered_map optimized_identifiers_count; + }; + + static void visit(const ASTPtr & ast, Data & data); + static void visit(const ASTFunction & function, Data & data); + static bool needChildVisit(ASTPtr & , ASTPtr &) { return true; } +}; + +using RewriteFunctionToSubcolumnFirstPassVisitor = InDepthNodeVisitor; /// Rewrites functions to subcolumns, if possible, to reduce amount of read data. /// E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' -class RewriteFunctionToSubcolumnData +class RewriteFunctionToSubcolumnSecondPassData { public: using TypeToVisit = ASTFunction; void visit(ASTFunction & function, ASTPtr & ast) const; + RewriteFunctionToSubcolumnSecondPassData(StorageMetadataPtr metadata_snapshot_, NameSet identifiers_to_optimize_) + : metadata_snapshot(std::move(metadata_snapshot_)), identifiers_to_optimize(std::move(identifiers_to_optimize_)) + { + } + StorageMetadataPtr metadata_snapshot; - IdentifierNameSet forbidden_identifiers; + NameSet identifiers_to_optimize; }; -using RewriteFunctionToSubcolumnMatcher = OneTypeMatcher; -using RewriteFunctionToSubcolumnVisitor = InDepthNodeVisitor; +using RewriteFunctionToSubcolumnSecondPassMatcher = OneTypeMatcher; +using RewriteFunctionToSubcolumnSecondPassVisitor = InDepthNodeVisitor; } diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 3df6242fa85..07cfe897010 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -659,35 +659,55 @@ void optimizeFunctionsToSubcolumns(ASTPtr & query, const TreeRewriterResult & re return; const auto & metadata_snapshot = result.storage_snapshot->metadata; - // const auto & select_query = assert_cast(*query); + const auto & select_query = assert_cast(*query); /// For queries with FINAL converting function to subcolumn may alter /// special merging algorithms and produce wrong result of query. - // if (select_query.final()) - // return; + if (select_query.final()) + return; - // FindIdentifiersForbiddenToReplaceToSubcolumnsVisitor::Data data; - // FindIdentifiersForbiddenToReplaceToSubcolumnsVisitor(data).visit(query); + NameSet all_key_columns; - IdentifierNameSet forbidden_identifiers; - - /// Do not optimize index columns (primary, min-max, secondary), - /// because otherwise analysis of indexes may be broken. - /// TODO: handle subcolumns in index analysis. const auto & primary_key_columns = result.storage_snapshot->metadata->getColumnsRequiredForPrimaryKey(); - forbidden_identifiers.insert(primary_key_columns.begin(), primary_key_columns.end()); + all_key_columns.insert(primary_key_columns.begin(), primary_key_columns.end()); const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); - forbidden_identifiers.insert(partition_key_columns.begin(), partition_key_columns.end()); + all_key_columns.insert(partition_key_columns.begin(), partition_key_columns.end()); for (const auto & index : metadata_snapshot->getSecondaryIndices()) { const auto & index_columns = index.expression->getRequiredColumns(); - forbidden_identifiers.insert(index_columns.begin(), index_columns.end()); + all_key_columns.insert(index_columns.begin(), index_columns.end()); } - RewriteFunctionToSubcolumnVisitor::Data rewrite_data{metadata_snapshot, forbidden_identifiers}; - RewriteFunctionToSubcolumnVisitor(rewrite_data).visit(query); + /// Do not optimize if full column is requested in other context. + /// It doesn't make sense because it doesn't reduce amount of read data + /// and optimized functions are not computation heavy. But introducing + /// new identifier complicates query analysis and may break it. + /// + /// E.g. query: + /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) + /// may be optimized to incorrect query: + /// SELECT n FROM table GROUP BY n HAVING not(n.null) + /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) + /// + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + + RewriteFunctionToSubcolumnFirstPassVisitor::Data data(metadata_snapshot); + RewriteFunctionToSubcolumnFirstPassVisitor(data).visit(query); + + NameSet identifiers_to_optimize; + for (const auto & [identifier, count] : data.optimized_identifiers_count) + if (!all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) + identifiers_to_optimize.insert(identifier); + + if (identifiers_to_optimize.empty()) + return; + + RewriteFunctionToSubcolumnSecondPassVisitor::Data rewrite_data(metadata_snapshot, identifiers_to_optimize); + RewriteFunctionToSubcolumnSecondPassVisitor(rewrite_data).visit(query); } void optimizeOrLikeChain(ASTPtr & query) diff --git a/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql b/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql index f723284ad61..151ff275f7b 100644 --- a/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql +++ b/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql @@ -12,9 +12,9 @@ SELECT * FROM t_tuple_numeric FORMAT JSONEachRow; SELECT `t`.`1`.`2`, `t`.`1`.`3`, `t`.`4` FROM t_tuple_numeric; SELECT t.1.1, t.1.2, t.2 FROM t_tuple_numeric; -SELECT t.1.3 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK} -SELECT t.4 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK} -SELECT `t`.`1`.`1`, `t`.`1`.`2`, `t`.`2` FROM t_tuple_numeric; -- {serverError UNKNOWN_IDENTIFIER} +SELECT t.1.3 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK, ARGUMENT_OUT_OF_BOUND} +SELECT t.4 FROM t_tuple_numeric; -- {serverError NOT_FOUND_COLUMN_IN_BLOCK, ARGUMENT_OUT_OF_BOUND} +SELECT `t`.`1`.`1`, `t`.`1`.`2`, `t`.`2` FROM t_tuple_numeric; -- {serverError UNKNOWN_IDENTIFIER, ARGUMENT_OUT_OF_BOUND} DROP TABLE t_tuple_numeric; From 93c362a803ff924f4176f6cd8483c08d731ccb59 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 12 Jan 2024 12:06:38 +0000 Subject: [PATCH 005/273] return and fix test --- .../01600_parts_states_metrics_long.reference | 4 ++ .../01600_parts_states_metrics_long.sh | 40 +++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 tests/queries/0_stateless/01600_parts_states_metrics_long.reference create mode 100755 tests/queries/0_stateless/01600_parts_states_metrics_long.sh diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.reference b/tests/queries/0_stateless/01600_parts_states_metrics_long.reference new file mode 100644 index 00000000000..98fb6a68656 --- /dev/null +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.reference @@ -0,0 +1,4 @@ +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh new file mode 100755 index 00000000000..2e47034e528 --- /dev/null +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# NOTE: database = $CLICKHOUSE_DATABASE is unwanted +verify_sql="SELECT + (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics) + = (SELECT sum(active), sum(NOT active) FROM + (SELECT active FROM system.parts UNION ALL SELECT active FROM system.projection_parts UNION ALL SELECT 1 FROM system.dropped_tables_parts))" + +# The query is not atomic - it can compare states between system.parts and system.metrics from different points in time. +# So, there is inherent race condition. But it should get expected result eventually. +# In case of test failure, this code will do infinite loop and timeout. +verify() +{ + while true + do + result=$( $CLICKHOUSE_CLIENT -m --query="$verify_sql" ) + [ "$result" = "1" ] && break + sleep 0.1 + done + echo 1 +} + +$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS test_table" +$CLICKHOUSE_CLIENT --query="CREATE TABLE test_table(data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;" + +$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-01')" +verify + +$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-02')" +verify + +$CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE test_table FINAL" +verify + +$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE test_table" +verify From 8a0126204272fdcecd722595a2d7e64496ba7c94 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 15 Jan 2024 18:17:36 +0000 Subject: [PATCH 006/273] fix tests --- src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp | 9 ++++++--- tests/queries/0_stateless/02116_tuple_element.sql | 8 ++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 82d50f5fdb1..c5d34b5462a 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -64,6 +64,9 @@ public: void enterImpl(const QueryTreeNodePtr & node) { + if (!getSettings().optimize_functions_to_subcolumns) + return; + if (data.has_final) return; @@ -202,6 +205,9 @@ public: void enterImpl(QueryTreeNodePtr & node) const { + if (!getSettings().optimize_functions_to_subcolumns) + return; + auto [function_node, first_argument_column_node, table_node] = getTypedNodesForOptimization(node); if (!function_node || !first_argument_column_node || !table_node) return; @@ -358,9 +364,6 @@ private: void FunctionToSubcolumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) { - if (!context->getSettingsRef().optimize_functions_to_subcolumns) - return; - FunctionToSubcolumnsVisitorFirstPass first_visitor(context); first_visitor.visit(query_tree_node); auto data = first_visitor.getData(); diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element.sql index 97f6c049705..ece7114e763 100644 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ b/tests/queries/0_stateless/02116_tuple_element.sql @@ -17,8 +17,8 @@ EXPLAIN SYNTAX SELECT tupleElement(t1, 'a') FROM t_tuple_element; SELECT tupleElement(number, 1) FROM numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT t2.1 FROM t_tuple_element; @@ -29,8 +29,8 @@ EXPLAIN SYNTAX SELECT tupleElement(t2, 1) FROM t_tuple_element; SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ILLEGAL_INDEX, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } DROP TABLE t_tuple_element; From e6ad9dd387cc004096eee3c2bfbadf6689473203 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 17 Jan 2024 17:21:15 +0000 Subject: [PATCH 007/273] fix crash with optimize_functions_to_subcolumns --- src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 2a235ae31e4..eaf27a7ae80 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -81,6 +81,9 @@ std::optional getColumnFromArgumentsToOptimize( return {}; const auto & column_type = columns.get(name_in_storage).type; + if (column_type->hasDynamicSubcolumns()) + return {}; + return NameAndTypePair{name_in_storage, column_type}; } From 0c9926a7045d6200ad2e486f3ee3532c1cbbca16 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 18 Jan 2024 15:42:31 +0000 Subject: [PATCH 008/273] fixes for optimize_functions_to_subcolumns --- .../Passes/FunctionToSubcolumnsPass.cpp | 28 +++++++++++++++---- src/Interpreters/TreeOptimizer.cpp | 2 +- src/Storages/HDFS/StorageHDFS.h | 1 + src/Storages/HDFS/StorageHDFSCluster.h | 2 -- src/Storages/IStorage.h | 2 ++ src/Storages/IStorageCluster.h | 4 ++- src/Storages/S3Queue/StorageS3Queue.h | 1 + src/Storages/StorageAzureBlob.h | 2 ++ src/Storages/StorageAzureBlobCluster.h | 2 -- src/Storages/StorageFile.h | 1 + src/Storages/StorageFileCluster.h | 2 -- src/Storages/StorageS3.h | 1 + src/Storages/StorageS3Cluster.h | 2 -- src/Storages/StorageURL.h | 1 + src/Storages/StorageURLCluster.h | 2 -- 15 files changed, 35 insertions(+), 18 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 7b5f3a433ad..9aa785d5918 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -35,17 +35,29 @@ std::tuple getTypedNodesForOptimizati return {}; auto * first_argument_column_node = function_arguments_nodes.front()->as(); - if (!first_argument_column_node) + if (!first_argument_column_node || first_argument_column_node->getColumnName() == "__grouping_set") return {}; auto column_source = first_argument_column_node->getColumnSource(); auto * table_node = column_source->as(); - if (!table_node || !table_node->getStorage()->supportsSubcolumns()) + if (!table_node) + return {}; + + const auto & storage = table_node->getStorage(); + const auto & storage_snapshot = table_node->getStorageSnapshot(); + auto column = first_argument_column_node->getColumn(); + + if (!storage->supportsOptimizationToSubcolumns() || storage->isVirtualColumn(column.name, storage_snapshot->metadata)) + return {}; + + auto column_in_table = storage_snapshot->tryGetColumn(GetColumnsOptions::All, column.name); + if (!column_in_table || !column_in_table->type->equals(*column.type)) return {}; return std::make_tuple(function_node, first_argument_column_node, table_node); } +/// First pass collects info about identifiers to determine which identifiers are allowed to optimize. class FunctionToSubcolumnsVisitorFirstPass : public InDepthQueryTreeVisitorWithContext { public: @@ -132,6 +144,9 @@ private: void enterImpl(const ColumnNode & column_node) { + if (column_node.getColumnName() == "__grouping_set") + return; + auto column_source = column_node.getColumnSource(); auto * table_node = column_source->as(); if (!table_node) @@ -191,7 +206,7 @@ private: } }; - +/// Second pass optimizes functions to subcolumns for allowed identifiers. class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext { public: @@ -222,9 +237,6 @@ public: if (!identifiers_to_optimize.contains(qualified_name)) return; - if (first_argument_column_node->getColumnName() == "__grouping_set") - return; - auto column_source = first_argument_column_node->getColumnSource(); WhichDataType column_type(column.type); @@ -236,6 +248,8 @@ public: { /// Replace `length(array_argument)` with `array_argument.size0` column.name += ".size0"; + column.type = std::make_shared(); + node = std::make_shared(column, column_source); } else if (function_name == "empty") @@ -269,6 +283,8 @@ public: { /// Replace `isNull(nullable_argument)` with `nullable_argument.null` column.name += ".null"; + column.type = std::make_shared(); + node = std::make_shared(column, column_source); } else if (function_name == "isNotNull") diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 07cfe897010..8fab032aece 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -655,7 +655,7 @@ void transformIfStringsIntoEnum(ASTPtr & query) void optimizeFunctionsToSubcolumns(ASTPtr & query, const TreeRewriterResult & result) { - if (!result.storage || !result.storage->supportsSubcolumns() || !result.storage_snapshot) + if (!result.storage || !result.storage->supportsOptimizationToSubcolumns() || !result.storage_snapshot) return; const auto & metadata_snapshot = result.storage_snapshot->metadata; diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index f1f0019d3e0..bd36556c017 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -81,6 +81,7 @@ public: bool supportsSubsetOfColumns(const ContextPtr & context_) const; bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } static ColumnsDescription getTableStructureFromData( const String & format, diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 7c4c41a573a..f35a912129c 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -37,8 +37,6 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } private: diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 4fa6bfdd617..62faedd19ba 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -167,6 +167,8 @@ public: /// Returns true if the storage supports reading of subcolumns of complex types. virtual bool supportsSubcolumns() const { return false; } + /// Returns true if storage supports optimizations of functions by reading subcolumns. + virtual bool supportsOptimizationToSubcolumns() const { return supportsSubcolumns(); } /// Returns true if the storage supports transactions for SELECT, INSERT and ALTER queries. /// Storage may throw an exception later if some query kind is not fully supported. diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h index b233f20103d..0e466976852 100644 --- a/src/Storages/IStorageCluster.h +++ b/src/Storages/IStorageCluster.h @@ -38,7 +38,9 @@ public: QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; - bool isRemote() const override { return true; } + bool isRemote() const override final { return true; } + bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } protected: virtual void updateBeforeRead(const ContextPtr &) {} diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h index 3d3594dc2ab..0b50913546e 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -86,6 +86,7 @@ private: void drop() override; bool supportsSubsetOfColumns(const ContextPtr & context_) const; bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } std::shared_ptr createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate); std::shared_ptr createSource( diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 16e5b9edfb6..4d54f1cdcc3 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -101,6 +101,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } + bool supportsSubsetOfColumns(const ContextPtr & context) const; bool supportsTrivialCountOptimization() const override { return true; } diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h index 2831b94f825..c95e329803c 100644 --- a/src/Storages/StorageAzureBlobCluster.h +++ b/src/Storages/StorageAzureBlobCluster.h @@ -36,8 +36,6 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } private: diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index b74868597a6..db7d8be15cf 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -93,6 +93,7 @@ public: bool supportsSubsetOfColumns(const ContextPtr & context) const; bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } bool prefersLargeBlocks() const override; diff --git a/src/Storages/StorageFileCluster.h b/src/Storages/StorageFileCluster.h index a6e57c3bb4f..cb00e8870e8 100644 --- a/src/Storages/StorageFileCluster.h +++ b/src/Storages/StorageFileCluster.h @@ -33,8 +33,6 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } private: diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index b90a0d394cb..a027f96aa0a 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -386,6 +386,7 @@ private: ContextPtr ctx); bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } bool supportsSubsetOfColumns(const ContextPtr & context) const; diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index c526f14834a..81169f79746 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -36,8 +36,6 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } protected: diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 07d4d0cad38..f16f2757611 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -281,6 +281,7 @@ public: } bool supportsSubcolumns() const override { return true; } + bool supportsOptimizationToSubcolumns() const override { return false; } static FormatSettings getFormatSettingsFromArgs(const StorageFactory::Arguments & args); diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h index 07978040029..a555df3cd43 100644 --- a/src/Storages/StorageURLCluster.h +++ b/src/Storages/StorageURLCluster.h @@ -36,8 +36,6 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } private: From a89956bb0f614d81d3db7998ea026a0a01db8cd4 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 22 Jan 2024 19:33:34 +0000 Subject: [PATCH 009/273] more cases for optimize_functions_to_subcolumns --- ...egateFunctionsArithmericOperationsPass.cpp | 27 +++----- .../Passes/ComparisonTupleEliminationPass.cpp | 13 ++-- src/Analyzer/Passes/CountDistinctPass.cpp | 7 +-- .../Passes/FunctionToSubcolumnsPass.cpp | 58 +++++++++++------ .../Passes/NormalizeCountVariantsPass.cpp | 13 +--- ...ateOrDateTimeConverterWithPreimagePass.cpp | 25 +++----- .../RewriteAggregateFunctionWithIfPass.cpp | 25 +++----- .../RewriteSumFunctionWithSumAndCountPass.cpp | 31 ++-------- src/Analyzer/Passes/SumIfToCountIfPass.cpp | 16 ++--- .../UniqInjectiveFunctionsEliminationPass.cpp | 11 +--- src/Analyzer/Passes/UniqToCountPass.cpp | 6 +- src/Analyzer/Utils.cpp | 22 +++++++ src/Analyzer/Utils.h | 8 +++ src/Interpreters/InterpreterExplainQuery.cpp | 26 +++++--- .../RewriteFunctionToSubcolumnVisitor.cpp | 62 +++++++------------ .../01872_functions_to_subcolumns.reference | 18 +++--- .../01872_functions_to_subcolumns.sql | 1 - ...functions_to_subcolumns_analyzer.reference | 50 +++++++++++++++ ...01872_functions_to_subcolumns_analyzer.sql | 42 +++++++++++++ .../0_stateless/02115_map_contains.reference | 2 +- .../02115_map_contains_analyzer.reference | 4 ++ .../02115_map_contains_analyzer.sql | 13 ++++ .../0_stateless/02116_tuple_element.reference | 10 +-- .../02116_tuple_element_analyzer.reference | 25 ++++++++ .../02116_tuple_element_analyzer.sql | 43 +++++++++++++ ...tions_to_subcolumns_column_names.reference | 14 +++++ ...1_functions_to_subcolumns_column_names.sql | 19 ++++++ ...2971_functions_to_subcolumns_map.reference | 8 +++ .../02971_functions_to_subcolumns_map.sql | 19 ++++++ 29 files changed, 411 insertions(+), 207 deletions(-) create mode 100644 tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference create mode 100644 tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql create mode 100644 tests/queries/0_stateless/02115_map_contains_analyzer.reference create mode 100644 tests/queries/0_stateless/02115_map_contains_analyzer.sql create mode 100644 tests/queries/0_stateless/02116_tuple_element_analyzer.reference create mode 100644 tests/queries/0_stateless/02116_tuple_element_analyzer.sql create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index baecb372c2d..b8a477b8523 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB { @@ -165,31 +166,17 @@ private: auto aggregate_function_clone = aggregate_function->clone(); auto & aggregate_function_clone_typed = aggregate_function_clone->as(); aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument }; - resolveAggregateFunctionNode(aggregate_function_clone_typed, arithmetic_function_clone_argument, result_aggregate_function_name); + + resolveAggregateFunctionNodeByName( + aggregate_function_clone_typed, + result_aggregate_function_name, + {arithmetic_function_clone_argument->getResultType()}); arithmetic_function_clone_arguments_nodes[arithmetic_function_argument_index] = std::move(aggregate_function_clone); - resolveOrdinaryFunctionNode(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName()); + resolveOrdinaryFunctionNodeByName(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName(), getContext()); return arithmetic_function_clone; } - - inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } - - static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name) - { - auto function_aggregate_function = function_node.getAggregateFunction(); - - AggregateFunctionProperties properties; - auto action = NullsAction::EMPTY; - auto aggregate_function = AggregateFunctionFactory::instance().get( - aggregate_function_name, action, {argument->getResultType()}, function_aggregate_function->getParameters(), properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } }; } diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp index 7c38ba81c70..42b53f667b4 100644 --- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp +++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB { @@ -171,13 +172,13 @@ private: { auto result_function = std::make_shared("and"); result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions); - resolveOrdinaryFunctionNode(*result_function, result_function->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), context); if (comparison_function_name == "notEquals") { auto not_function = std::make_shared("not"); not_function->getArguments().getNodes().push_back(std::move(result_function)); - resolveOrdinaryFunctionNode(*not_function, not_function->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), context); result_function = std::move(not_function); } @@ -197,17 +198,11 @@ private: comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument)); comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument)); - resolveOrdinaryFunctionNode(*comparison_function, comparison_function->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), context); return comparison_function; } - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, context); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } - ContextPtr context; }; diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp index 07a031fe4e8..a73ca4befcf 100644 --- a/src/Analyzer/Passes/CountDistinctPass.cpp +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB { @@ -77,11 +78,9 @@ public: /// Replace `countDistinct` of initial query into `count` auto result_type = function_node->getResultType(); - AggregateFunctionProperties properties; - auto action = NullsAction::EMPTY; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", action, {}, {}, properties); - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + function_node->getArguments().getNodes().clear(); + resolveAggregateFunctionNodeByName(*function_node, "count", {}); } }; diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 9aa785d5918..ac13a505a52 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB { @@ -178,12 +179,12 @@ private: } else if (column_type.isNullable()) { - if (function_name == "isNull" || function_name == "isNotNull") + if (function_name == "count" || function_name == "isNull" || function_name == "isNotNull") ++data.optimized_identifiers_count[qualified_name]; } else if (column_type.isMap()) { - if (function_name == "mapKeys" || function_name == "mapValues") + if (function_name == "length" || function_name == "mapKeys" || function_name == "mapValues") ++data.optimized_identifiers_count[qualified_name]; } } @@ -192,10 +193,10 @@ private: const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) { - const auto & tuple_element_constant_value = second_argument_constant_node->getValue(); - const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType(); + const auto & constant_value = second_argument_constant_node->getValue(); + const auto & constant_value_type = constant_value.getType(); - if (tuple_element_constant_value_type == Field::Types::String || tuple_element_constant_value_type == Field::Types::UInt64) + if (constant_value_type == Field::Types::String || constant_value_type == Field::Types::UInt64) ++data.optimized_identifiers_count[qualified_name]; } else if (function_name == "mapContains" && column_type.isMap()) @@ -209,6 +210,9 @@ private: /// Second pass optimizes functions to subcolumns for allowed identifiers. class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext { +private: + std::unordered_set identifiers_to_optimize; + public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; @@ -262,7 +266,7 @@ public: function_arguments_nodes.push_back(std::make_shared(column, column_source)); function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - resolveOrdinaryFunctionNode(*function_node, "equals"); + resolveOrdinaryFunctionNodeByName(*function_node, "equals", getContext()); } else if (function_name == "notEmpty") { @@ -274,12 +278,27 @@ public: function_arguments_nodes.push_back(std::make_shared(column, column_source)); function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - resolveOrdinaryFunctionNode(*function_node, "notEquals"); + resolveOrdinaryFunctionNodeByName(*function_node, "notEquals", getContext()); } } else if (column_type.isNullable()) { - if (function_name == "isNull") + if (function_name == "count") + { + /// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))` + column.name += ".null"; + column.type = std::make_shared(); + + auto column_node = std::make_shared(column, column_source); + auto function_node_not = std::make_shared("not"); + + function_node_not->getArguments().getNodes().push_back(std::move(column_node)); + resolveOrdinaryFunctionNodeByName(*function_node_not, "not", getContext()); + + function_arguments_nodes = {std::move(function_node_not)}; + resolveAggregateFunctionNodeByName(*function_node, "sum", {column.type}); + } + else if (function_name == "isNull") { /// Replace `isNull(nullable_argument)` with `nullable_argument.null` column.name += ".null"; @@ -295,12 +314,20 @@ public: function_arguments_nodes = {std::make_shared(column, column_source)}; - resolveOrdinaryFunctionNode(*function_node, "not"); + resolveOrdinaryFunctionNodeByName(*function_node, "not", getContext()); } } else if (column_type.isMap()) { - if (function_name == "mapKeys") + if (function_name == "length") + { + /// Replace `length(map_argument)` with `map_argument.size0` + column.name += ".size0"; + column.type = std::make_shared(); + + node = std::make_shared(column, column_source); + } + else if (function_name == "mapKeys") { /// Replace `mapKeys(map_argument)` with `map_argument.keys` column.name += ".keys"; @@ -364,19 +391,10 @@ public: auto has_function_argument = std::make_shared(column, column_source); function_arguments_nodes[0] = std::move(has_function_argument); - resolveOrdinaryFunctionNode(*function_node, "has"); + resolveOrdinaryFunctionNodeByName(*function_node, "has", getContext()); } } } - -private: - std::unordered_set identifiers_to_optimize; - - inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } }; } diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index 6b801925a6e..6d9e6765608 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include namespace DB @@ -41,25 +42,17 @@ public: if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull()) { - resolveAsCountAggregateFunction(*function_node); function_node->getArguments().getNodes().clear(); + resolveAggregateFunctionNodeByName(*function_node, "count", {}); } else if (function_node->getFunctionName() == "sum" && first_argument_constant_literal.getType() == Field::Types::UInt64 && first_argument_constant_literal.get() == 1) { - resolveAsCountAggregateFunction(*function_node); function_node->getArguments().getNodes().clear(); + resolveAggregateFunctionNodeByName(*function_node, "count", {}); } } -private: - static inline void resolveAsCountAggregateFunction(FunctionNode & function_node) - { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } }; } diff --git a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp index 9b9ceacdd4c..cc6fe95101d 100644 --- a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp +++ b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -140,16 +141,16 @@ private: const auto lhs = std::make_shared("greaterOrEquals"); lhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); lhs->getArguments().getNodes().push_back(std::make_shared(start_date_or_date_time)); - resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*lhs, lhs->getFunctionName(), getContext()); const auto rhs = std::make_shared("less"); rhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); rhs->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext()); const auto new_date_filter = std::make_shared("and"); new_date_filter->getArguments().getNodes() = {lhs, rhs}; - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext()); return new_date_filter; } @@ -158,16 +159,16 @@ private: const auto lhs = std::make_shared("less"); lhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); lhs->getArguments().getNodes().push_back(std::make_shared(start_date_or_date_time)); - resolveOrdinaryFunctionNode(*lhs, lhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*lhs, lhs->getFunctionName(), getContext()); const auto rhs = std::make_shared("greaterOrEquals"); rhs->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); rhs->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext()); const auto new_date_filter = std::make_shared("or"); new_date_filter->getArguments().getNodes() = {lhs, rhs}; - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext()); return new_date_filter; } @@ -176,7 +177,7 @@ private: const auto new_date_filter = std::make_shared("greaterOrEquals"); new_date_filter->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); new_date_filter->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext()); return new_date_filter; } @@ -185,7 +186,7 @@ private: const auto new_date_filter = std::make_shared("less"); new_date_filter->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); new_date_filter->getArguments().getNodes().push_back(std::make_shared(end_date_or_date_time)); - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext()); return new_date_filter; } @@ -194,7 +195,7 @@ private: const auto new_date_filter = std::make_shared(comparator); new_date_filter->getArguments().getNodes().push_back(std::make_shared(column_node.getColumn(), column_node.getColumnSource())); new_date_filter->getArguments().getNodes().push_back(std::make_shared(start_date_or_date_time)); - resolveOrdinaryFunctionNode(*new_date_filter, new_date_filter->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*new_date_filter, new_date_filter->getFunctionName(), getContext()); return new_date_filter; } @@ -205,12 +206,6 @@ private: comparator); } } - - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } }; } diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index 9c89670f3c6..b8962e5a4c1 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB { @@ -58,8 +59,7 @@ public: function_arguments_nodes.resize(2); function_arguments_nodes[0] = std::move(if_arguments_nodes[1]); function_arguments_nodes[1] = std::move(if_arguments_nodes[0]); - resolveAsAggregateFunctionWithIf( - *function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()}); + resolveAsAggregateFunctionWithIf(*function_node, function_arguments_nodes); } } else if (first_const_node) @@ -79,30 +79,21 @@ public: function_arguments_nodes.resize(2); function_arguments_nodes[0] = std::move(if_arguments_nodes[2]); function_arguments_nodes[1] = std::move(not_function); - resolveAsAggregateFunctionWithIf( - *function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()}); + resolveAsAggregateFunctionWithIf(*function_node, function_arguments_nodes); } } } private: - static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types) + static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const QueryTreeNodes & arguments) { auto result_type = function_node.getResultType(); + auto suffix = result_type->isNullable() ? "OrNullIf" : "If"; - std::string suffix = "If"; - if (result_type->isNullable()) - suffix = "OrNullIf"; - - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( + resolveAggregateFunctionNodeByName( + function_node, function_node.getFunctionName() + suffix, - function_node.getNullsAction(), - argument_types, - function_node.getAggregateFunction()->getParameters(), - properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); + {arguments[0]->getResultType(), arguments[1]->getResultType()}); } }; diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp index 7887a1b7175..2f6674946a3 100644 --- a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp +++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace DB @@ -77,50 +78,30 @@ public: const auto lhs = std::make_shared("sum"); lhs->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); - resolveAsAggregateFunctionNode(*lhs, column_type); + resolveAggregateFunctionNodeByName(*lhs, lhs->getFunctionName(), {column_type}); const auto rhs_count = std::make_shared("count"); rhs_count->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); - resolveAsAggregateFunctionNode(*rhs_count, column_type); + resolveAggregateFunctionNodeByName(*rhs_count, rhs_count->getFunctionName(), {column_type}); const auto rhs = std::make_shared("multiply"); rhs->getArguments().getNodes().push_back(func_plus_minus_nodes[literal_id]); rhs->getArguments().getNodes().push_back(rhs_count); - resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); + resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext()); const auto new_node = std::make_shared(Poco::toLower(func_plus_minus_node->getFunctionName())); if (column_id == 0) new_node->getArguments().getNodes() = {lhs, rhs}; else if (column_id == 1) new_node->getArguments().getNodes() = {rhs, lhs}; - resolveOrdinaryFunctionNode(*new_node, new_node->getFunctionName()); + + resolveOrdinaryFunctionNodeByName(*new_node, new_node->getFunctionName(), getContext()); if (!new_node) return; node = new_node; - } - -private: - void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const - { - const auto function = FunctionFactory::instance().get(function_name, getContext()); - function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); - } - - static inline void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type) - { - AggregateFunctionProperties properties; - const auto aggregate_function = AggregateFunctionFactory::instance().get(function_node.getFunctionName(), - NullsAction::EMPTY, - {argument_type}, - {}, - properties); - - function_node.resolveAsAggregateFunction(aggregate_function); - } - }; } diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index c6b1c6eb851..78d5479843e 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -5,6 +5,7 @@ #include #include +#include #include @@ -65,7 +66,8 @@ public: auto multiplier_node = function_node_arguments_nodes[0]; function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]); function_node_arguments_nodes.resize(1); - resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); + + resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); if (constant_value_literal.get() != 1) { @@ -113,7 +115,7 @@ public: function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0]; function_node_arguments_nodes.resize(1); - resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); + resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); if (if_true_condition_value != 1) { @@ -142,7 +144,7 @@ public: function_node_arguments_nodes[0] = std::move(not_function); function_node_arguments_nodes.resize(1); - resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); + resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); if (if_false_condition_value != 1) { @@ -154,14 +156,6 @@ public: } private: - static inline void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type) - { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( - "countIf", NullsAction::EMPTY, {argument_type}, function_node.getAggregateFunction()->getParameters(), properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); - } inline QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right) { diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index a8382930506..610128a5754 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -7,6 +7,7 @@ #include #include +#include namespace DB @@ -75,15 +76,7 @@ public: for (const auto & function_node_argument : function_node_argument_nodes) argument_types.emplace_back(function_node_argument->getResultType()); - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( - function_node->getFunctionName(), - NullsAction::EMPTY, - argument_types, - function_node->getAggregateFunction()->getParameters(), - properties); - - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName(), argument_types); } }; diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index 11ebc45a369..d5e4e011cfa 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { @@ -175,11 +176,8 @@ public: /// Replace uniq of initial query to count if (match_subquery_with_distinct() || match_subquery_with_group_by()) { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties); - function_node->getArguments().getNodes().clear(); - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + resolveAggregateFunctionNodeByName(*function_node, "count", {}); } } }; diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index 53fcf534f64..c193619a35f 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -685,4 +685,26 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty return function_node; } +void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context) +{ + auto function = FunctionFactory::instance().get(function_name, context); + function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); +} + +void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name, const DataTypes & argument_types) +{ + chassert(function_node.isAggregateFunction()); + auto old_aggregate_function = function_node.getAggregateFunction(); + + AggregateFunctionProperties properties; + auto aggregate_function = AggregateFunctionFactory::instance().get( + function_name, + function_node.getNullsAction(), + argument_types, + old_aggregate_function->getParameters(), + properties); + + function_node.resolveAsAggregateFunction(std::move(aggregate_function)); +} + } diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index d3eb6ba3cc2..60f32d6b267 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -102,4 +102,12 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node); /// Wrap node into `_CAST` function QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context); +/// Resolves function node as ordinary function with given name. +/// Arguments and parameters are taken from the node. +void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context); + +/// Resolves function node as aggregate function with given name. +/// Arguments and parameters are taken from the node. +void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name, const DataTypes & argument_types); + } diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 458be843b59..b99506e948e 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -43,6 +43,7 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; } namespace @@ -170,6 +171,7 @@ struct QueryASTSettings struct QueryTreeSettings { bool run_passes = true; + bool dump_tree = true; bool dump_passes = false; bool dump_ast = false; Int64 passes = -1; @@ -179,6 +181,7 @@ struct QueryTreeSettings std::unordered_map> boolean_settings = { {"run_passes", run_passes}, + {"dump_tree", dump_tree}, {"dump_passes", dump_passes}, {"dump_ast", dump_ast} }; @@ -398,7 +401,11 @@ QueryPipeline InterpreterExplainQuery::executeImpl() throw Exception(ErrorCodes::INCORRECT_QUERY, "Only SELECT is supported for EXPLAIN QUERY TREE query"); auto settings = checkAndGetSettings(ast.getSettings()); + if (!settings.dump_tree && !settings.dump_ast) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Either 'dump_tree' or 'dump_ast' must be set for EXPLAIN QUERY TREE query"); + auto query_tree = buildQueryTree(ast.getExplainedQuery(), getContext()); + bool need_newline = false; if (settings.run_passes) { @@ -410,23 +417,26 @@ QueryPipeline InterpreterExplainQuery::executeImpl() if (settings.dump_passes) { query_tree_pass_manager.dump(buf, pass_index); - if (pass_index > 0) - buf << '\n'; + need_newline = true; } query_tree_pass_manager.run(query_tree, pass_index); + } + + if (settings.dump_tree) + { + if (need_newline) + buf << "\n\n"; query_tree->dumpTree(buf); - } - else - { - query_tree->dumpTree(buf); + need_newline = true; } if (settings.dump_ast) { - buf << '\n'; - buf << '\n'; + if (need_newline) + buf << "\n\n"; + query_tree->toAST()->format(IAST::FormatSettings(buf, false)); } diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index eaf27a7ae80..3167c2d37dc 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -40,27 +40,16 @@ ASTPtr transformCountNullableToSubcolumn(const String & name_in_storage, const S return makeASTFunction("sum", makeASTFunction("not", ast)); } -ASTPtr transformMapContainsToSubcolumn(const String & name_in_storage, const String & subcolumn_name, const ASTPtr & arg) +const std::unordered_map, String, decltype(&transformToSubcolumn)>> unary_function_to_subcolumn = { - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("has", ast, arg); -} - -const std::unordered_map> unary_function_to_subcolumn = -{ - {"length", {TypeIndex::Array, "size0", transformToSubcolumn}}, - {"empty", {TypeIndex::Array, "size0", transformEmptyToSubcolumn}}, - {"notEmpty", {TypeIndex::Array, "size0", transformNotEmptyToSubcolumn}}, - {"isNull", {TypeIndex::Nullable, "null", transformToSubcolumn}}, - {"isNotNull", {TypeIndex::Nullable, "null", transformIsNotNullToSubcolumn}}, - {"count", {TypeIndex::Nullable, "null", transformCountNullableToSubcolumn}}, - {"mapKeys", {TypeIndex::Map, "keys", transformToSubcolumn}}, - {"mapValues", {TypeIndex::Map, "values", transformToSubcolumn}}, -}; - -const std::unordered_map> binary_function_to_subcolumn -{ - {"mapContains", {TypeIndex::Map, "keys", transformMapContainsToSubcolumn}}, + {"length", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformToSubcolumn}}, + {"empty", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformEmptyToSubcolumn}}, + {"notEmpty", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformNotEmptyToSubcolumn}}, + {"isNull", {{TypeIndex::Nullable}, "null", transformToSubcolumn}}, + {"isNotNull", {{TypeIndex::Nullable}, "null", transformIsNotNullToSubcolumn}}, + {"count", {{TypeIndex::Nullable}, "null", transformCountNullableToSubcolumn}}, + {"mapKeys", {{TypeIndex::Map}, "keys", transformToSubcolumn}}, + {"mapValues", {{TypeIndex::Map}, "values", transformToSubcolumn}}, }; std::optional getColumnFromArgumentsToOptimize( @@ -116,10 +105,14 @@ void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & funct if (arguments.size() == 1) { auto it = unary_function_to_subcolumn.find(function.name); - if (it != unary_function_to_subcolumn.end() && std::get<0>(it->second) == column_type_id) + if (it == unary_function_to_subcolumn.end()) + return; + + const auto & expected_types_id = std::get<0>(it->second); + if (expected_types_id.contains(column_type_id)) ++data.optimized_identifiers_count[column->name]; } - else + else if (arguments.size() == 2) { if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) { @@ -131,11 +124,9 @@ void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & funct if (value_type == Field::Types::UInt64 || value_type == Field::Types::String) ++data.optimized_identifiers_count[column->name]; } - else + else if (function.name == "mapContains" && column_type_id == TypeIndex::Map) { - auto it = binary_function_to_subcolumn.find(function.name); - if (it != binary_function_to_subcolumn.end() && std::get<0>(it->second) == column_type_id) - ++data.optimized_identifiers_count[column->name]; + ++data.optimized_identifiers_count[column->name]; } } } @@ -148,7 +139,7 @@ void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, AST return; auto column_type_id = column->type->getTypeId(); - const auto & alias = function.tryGetAlias(); + auto alias = function.getAliasOrColumnName(); if (arguments.size() == 1) { @@ -156,8 +147,8 @@ void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, AST if (it == unary_function_to_subcolumn.end()) return; - const auto & [expected_type_id, subcolumn_name, transformer] = it->second; - if (column_type_id != expected_type_id) + const auto & [expected_types_id, subcolumn_name, transformer] = it->second; + if (!expected_types_id.contains(column_type_id)) return; ast = transformer(column->name, subcolumn_name); @@ -191,17 +182,10 @@ void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, AST ast = transformToSubcolumn(column->name, subcolumn_name); ast->setAlias(alias); } - else + else if (function.name == "mapContains" && column_type_id == TypeIndex::Map) { - auto it = binary_function_to_subcolumn.find(function.name); - if (it == binary_function_to_subcolumn.end()) - return; - - const auto & [expected_type_id, subcolumn_name, transformer] = it->second; - if (column_type_id != expected_type_id) - return; - - ast = transformer(column->name, subcolumn_name, arguments[1]); + auto subcolumn = transformToSubcolumn(column->name, "keys"); + ast = makeASTFunction("has", subcolumn, arguments[1]); ast->setAlias(alias); } } diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns.reference index a1cd31e2dc9..8c4017d6030 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns.reference +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns.reference @@ -2,25 +2,25 @@ 0 1 0 SELECT id IS NULL, - `n.null`, - NOT `n.null` + `n.null` AS `isNull(n)`, + NOT `n.null` AS `isNotNull(n)` FROM t_func_to_subcolumns 3 0 1 0 0 1 0 \N SELECT - `arr.size0`, - `arr.size0` = 0, - `arr.size0` != 0, + `arr.size0` AS `length(arr)`, + `arr.size0` = 0 AS `empty(arr)`, + `arr.size0` != 0 AS `notEmpty(arr)`, empty(n) FROM t_func_to_subcolumns ['foo','bar'] [1,2] [] [] SELECT - `m.keys`, - `m.values` + `m.keys` AS `mapKeys(m)`, + `m.values` AS `mapValues(m)` FROM t_func_to_subcolumns 1 -SELECT sum(NOT `n.null`) +SELECT sum(NOT `n.null`) AS `count(n)` FROM t_func_to_subcolumns 2 SELECT count(id) @@ -30,7 +30,7 @@ FROM t_func_to_subcolumns 3 0 0 SELECT id, - `n.null`, + `n.null` AS `isNull(n)`, right.n IS NULL FROM t_func_to_subcolumns AS left ALL FULL OUTER JOIN diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns.sql b/tests/queries/0_stateless/01872_functions_to_subcolumns.sql index eb0165f4e13..45f83bf20e5 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns.sql +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns.sql @@ -1,6 +1,5 @@ DROP TABLE IF EXISTS t_func_to_subcolumns; -SET allow_experimental_map_type = 1; SET optimize_functions_to_subcolumns = 1; CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference new file mode 100644 index 00000000000..ce5e46fa271 --- /dev/null +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference @@ -0,0 +1,50 @@ +0 0 1 +0 1 0 +SELECT + __table1.id IS NULL AS `isNull(id)`, + __table1.`n.null` AS `isNull(n)`, + NOT __table1.`n.null` AS `isNotNull(n)` +FROM default.t_func_to_subcolumns AS __table1 +3 0 1 0 +0 1 0 \N +SELECT + __table1.`arr.size0` AS `length(arr)`, + __table1.`arr.size0` = 0 AS `empty(arr)`, + __table1.`arr.size0` != 0 AS `notEmpty(arr)`, + empty(__table1.n) AS `empty(n)` +FROM default.t_func_to_subcolumns AS __table1 +['foo','bar'] [1,2] +[] [] +SELECT + __table1.`m.keys` AS `mapKeys(m)`, + __table1.`m.values` AS `mapValues(m)` +FROM default.t_func_to_subcolumns AS __table1 +1 +SELECT sum(NOT __table1.`n.null`) AS `count(n)` +FROM default.t_func_to_subcolumns AS __table1 +2 +SELECT count(__table1.id) AS `count(id)` +FROM default.t_func_to_subcolumns AS __table1 +1 0 0 +2 1 0 +3 0 0 +SELECT + __table1.id AS id, + __table1.`n.null` AS `isNull(n)`, + __table2.n IS NULL AS `isNull(right.n)` +FROM default.t_func_to_subcolumns AS __table1 +ALL FULL OUTER JOIN +( + + SELECT + 1 AS id, + \'qqq\' AS n + FROM system.one AS __table4 + UNION ALL + SELECT + 3 AS id, + \'www\' AS `\'www\'` + FROM system.one AS __table6 +) AS __table2 USING (id) +0 10 +0 20 diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql new file mode 100644 index 00000000000..c1ab6909e2f --- /dev/null +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS t_func_to_subcolumns; + +SET allow_experimental_analyzer = 1; +SET optimize_functions_to_subcolumns = 1; + +CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) +ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map()); + +SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; + +SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; + +SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; + +SELECT count(n) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT count(n) FROM t_func_to_subcolumns; + +SELECT count(id) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT count(id) FROM t_func_to_subcolumns; + +SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left +FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left +FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); + +DROP TABLE t_func_to_subcolumns; + +DROP TABLE IF EXISTS t_tuple_null; + +CREATE TABLE t_tuple_null (t Tuple(null UInt32)) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_tuple_null VALUES ((10)), ((20)); + +SELECT t IS NULL, t.null FROM t_tuple_null; + +DROP TABLE t_tuple_null; diff --git a/tests/queries/0_stateless/02115_map_contains.reference b/tests/queries/0_stateless/02115_map_contains.reference index 975e9876237..e4ae4f951ba 100644 --- a/tests/queries/0_stateless/02115_map_contains.reference +++ b/tests/queries/0_stateless/02115_map_contains.reference @@ -1,4 +1,4 @@ -SELECT has(`m.keys`, \'a\') +SELECT has(`m.keys`, \'a\') AS `mapContains(m, \'a\')` FROM t_map_contains 1 0 diff --git a/tests/queries/0_stateless/02115_map_contains_analyzer.reference b/tests/queries/0_stateless/02115_map_contains_analyzer.reference new file mode 100644 index 00000000000..7da5243e727 --- /dev/null +++ b/tests/queries/0_stateless/02115_map_contains_analyzer.reference @@ -0,0 +1,4 @@ +SELECT has(__table1.`m.keys`, \'a\') AS `mapContains(m, \'a\')` +FROM default.t_map_contains AS __table1 +1 +0 diff --git a/tests/queries/0_stateless/02115_map_contains_analyzer.sql b/tests/queries/0_stateless/02115_map_contains_analyzer.sql new file mode 100644 index 00000000000..46e02eca4f0 --- /dev/null +++ b/tests/queries/0_stateless/02115_map_contains_analyzer.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS t_map_contains; + +CREATE TABLE t_map_contains (m Map(String, UInt32)) ENGINE = Memory; + +INSERT INTO t_map_contains VALUES (map('a', 1, 'b', 2)), (map('c', 3, 'd', 4)); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT mapContains(m, 'a') FROM t_map_contains; +SELECT mapContains(m, 'a') FROM t_map_contains; + +DROP TABLE t_map_contains; diff --git a/tests/queries/0_stateless/02116_tuple_element.reference b/tests/queries/0_stateless/02116_tuple_element.reference index 121b08d02f1..a8004f5e74c 100644 --- a/tests/queries/0_stateless/02116_tuple_element.reference +++ b/tests/queries/0_stateless/02116_tuple_element.reference @@ -1,17 +1,17 @@ 1 -SELECT `t1.a` +SELECT `t1.a` AS `tupleElement(t1, 1)` FROM t_tuple_element a -SELECT `t1.s` +SELECT `t1.s` AS `tupleElement(t1, 2)` FROM t_tuple_element 1 -SELECT `t1.a` +SELECT `t1.a` AS `tupleElement(t1, \'a\')` FROM t_tuple_element 2 -SELECT `t2.1` +SELECT `t2.1` AS `tupleElement(t2, 1)` FROM t_tuple_element 2 -SELECT `t2.1` +SELECT `t2.1` AS `tupleElement(t2, 1)` FROM t_tuple_element 1 2 WITH (1, 2) AS t diff --git a/tests/queries/0_stateless/02116_tuple_element_analyzer.reference b/tests/queries/0_stateless/02116_tuple_element_analyzer.reference new file mode 100644 index 00000000000..d30f3a6cc58 --- /dev/null +++ b/tests/queries/0_stateless/02116_tuple_element_analyzer.reference @@ -0,0 +1,25 @@ +1 +SELECT __table1.`t1.a` AS `tupleElement(t1, 1)` +FROM default.t_tuple_element AS __table1 +a +SELECT __table1.`t1.s` AS `tupleElement(t1, 2)` +FROM default.t_tuple_element AS __table1 +1 +SELECT __table1.`t1.a` AS `tupleElement(t1, \'a\')` +FROM default.t_tuple_element AS __table1 +2 +SELECT __table1.`t2.1` AS `tupleElement(t2, 1)` +FROM default.t_tuple_element AS __table1 +2 +SELECT __table1.`t2.1` AS `tupleElement(t2, 1)` +FROM default.t_tuple_element AS __table1 +1 2 +SELECT + 1 AS `tupleElement(t, 1)`, + 2 AS `tupleElement(t, 2)` +FROM system.one AS __table1 +1 2 +SELECT + _CAST(1, \'UInt32\') AS `tupleElement(t, 1)`, + _CAST(2, \'UInt32\') AS `tupleElement(t, \'b\')` +FROM system.one AS __table1 diff --git a/tests/queries/0_stateless/02116_tuple_element_analyzer.sql b/tests/queries/0_stateless/02116_tuple_element_analyzer.sql new file mode 100644 index 00000000000..5aeb72c9ee4 --- /dev/null +++ b/tests/queries/0_stateless/02116_tuple_element_analyzer.sql @@ -0,0 +1,43 @@ +DROP TABLE IF EXISTS t_tuple_element; + +CREATE TABLE t_tuple_element(t1 Tuple(a UInt32, s String), t2 Tuple(UInt32, String)) ENGINE = Memory; +INSERT INTO t_tuple_element VALUES ((1, 'a'), (2, 'b')); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +SELECT t1.1 FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT t1.1 FROM t_tuple_element; + +SELECT tupleElement(t1, 2) FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t1, 2) FROM t_tuple_element; + +SELECT tupleElement(t1, 'a') FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t1, 'a') FROM t_tuple_element; + +SELECT tupleElement(number, 1) FROM numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } +SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT t2.1 FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT t2.1 FROM t_tuple_element; + +SELECT tupleElement(t2, 1) FROM t_tuple_element; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT tupleElement(t2, 1) FROM t_tuple_element; + +SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } +SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } +SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +DROP TABLE t_tuple_element; + +WITH (1, 2) AS t SELECT t.1, t.2; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 WITH (1, 2) AS t SELECT t.1, t.2; + +WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference new file mode 100644 index 00000000000..4787c660c68 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference @@ -0,0 +1,14 @@ +SELECT + `arr.size0` AS `length(arr)`, + `n.null` AS `isNull(n)` +FROM t_column_names +┌─length(arr)─┬─isNull(n)─┐ +│ 3 │ 0 │ +└─────────────┴───────────┘ +SELECT + __table1.`arr.size0` AS `length(arr)`, + __table1.`n.null` AS `isNull(n)` +FROM default.t_column_names AS __table1 +┌─length(arr)─┬─isNull(n)─┐ +│ 3 │ 0 │ +└─────────────┴───────────┘ diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql new file mode 100644 index 00000000000..89c39046df3 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS t_column_names; + +CREATE TABLE t_column_names (arr Array(UInt64), n Nullable(String)) ENGINE = Memory; + +INSERT INTO t_column_names VALUES ([1, 2, 3], 'foo'); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 0; + +EXPLAIN SYNTAX SELECT length(arr), isNull(n) FROM t_column_names; +SELECT length(arr), isNull(n) FROM t_column_names FORMAT PrettyCompactNoEscapes; + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr), isNull(n) FROM t_column_names; +SELECT length(arr), isNull(n) FROM t_column_names FORMAT PrettyCompactNoEscapes; + +DROP TABLE t_column_names; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference new file mode 100644 index 00000000000..90596ce1000 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference @@ -0,0 +1,8 @@ +SELECT `m.size0` AS `length(m)` +FROM t_func_to_subcolumns_map +2 +1 +SELECT __table1.`m.size0` AS `length(m)` +FROM default.t_func_to_subcolumns_map AS __table1 +2 +1 diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql new file mode 100644 index 00000000000..b5687696b43 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS t_func_to_subcolumns_map; + +CREATE TABLE t_func_to_subcolumns_map (id UInt64, m Map(String, UInt64)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_func_to_subcolumns_map VALUES (1, map('aaa', 1, 'bbb', 2)) (2, map('ccc', 3)); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 0; + +EXPLAIN SYNTAX SELECT length(m) FROM t_func_to_subcolumns_map; +SELECT length(m) FROM t_func_to_subcolumns_map; + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(m) FROM t_func_to_subcolumns_map; +SELECT length(m) FROM t_func_to_subcolumns_map; + +DROP TABLE t_func_to_subcolumns_map; From 368c99f1827acabd32a1ec11dba8711c627cdd53 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 23 Jan 2024 02:46:18 +0000 Subject: [PATCH 010/273] fix crash with analyzer --- ...egateFunctionsArithmericOperationsPass.cpp | 7 ++--- src/Analyzer/Passes/CountDistinctPass.cpp | 2 +- .../Passes/FunctionToSubcolumnsPass.cpp | 2 +- .../Passes/NormalizeCountVariantsPass.cpp | 4 +-- .../RewriteAggregateFunctionWithIfPass.cpp | 12 +++------ .../RewriteSumFunctionWithSumAndCountPass.cpp | 4 +-- src/Analyzer/Passes/SumIfToCountIfPass.cpp | 6 ++--- .../UniqInjectiveFunctionsEliminationPass.cpp | 10 +------ src/Analyzer/Passes/UniqToCountPass.cpp | 2 +- src/Analyzer/Utils.cpp | 26 ++++++------------- src/Analyzer/Utils.h | 2 +- 11 files changed, 26 insertions(+), 51 deletions(-) diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index b8a477b8523..a3d3b0ca13a 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -165,12 +165,9 @@ private: auto aggregate_function_clone = aggregate_function->clone(); auto & aggregate_function_clone_typed = aggregate_function_clone->as(); - aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument }; - resolveAggregateFunctionNodeByName( - aggregate_function_clone_typed, - result_aggregate_function_name, - {arithmetic_function_clone_argument->getResultType()}); + aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument }; + resolveAggregateFunctionNodeByName(aggregate_function_clone_typed, result_aggregate_function_name); arithmetic_function_clone_arguments_nodes[arithmetic_function_argument_index] = std::move(aggregate_function_clone); resolveOrdinaryFunctionNodeByName(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName(), getContext()); diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp index a73ca4befcf..45d0301a0fe 100644 --- a/src/Analyzer/Passes/CountDistinctPass.cpp +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -80,7 +80,7 @@ public: auto result_type = function_node->getResultType(); function_node->getArguments().getNodes().clear(); - resolveAggregateFunctionNodeByName(*function_node, "count", {}); + resolveAggregateFunctionNodeByName(*function_node, "count"); } }; diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index ac13a505a52..de8b7753700 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -296,7 +296,7 @@ public: resolveOrdinaryFunctionNodeByName(*function_node_not, "not", getContext()); function_arguments_nodes = {std::move(function_node_not)}; - resolveAggregateFunctionNodeByName(*function_node, "sum", {column.type}); + resolveAggregateFunctionNodeByName(*function_node, "sum"); } else if (function_name == "isNull") { diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index 6d9e6765608..1810158a2d7 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -43,14 +43,14 @@ public: if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull()) { function_node->getArguments().getNodes().clear(); - resolveAggregateFunctionNodeByName(*function_node, "count", {}); + resolveAggregateFunctionNodeByName(*function_node, "count"); } else if (function_node->getFunctionName() == "sum" && first_argument_constant_literal.getType() == Field::Types::UInt64 && first_argument_constant_literal.get() == 1) { function_node->getArguments().getNodes().clear(); - resolveAggregateFunctionNodeByName(*function_node, "count", {}); + resolveAggregateFunctionNodeByName(*function_node, "count"); } } }; diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index b8962e5a4c1..37eb3d98614 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -59,7 +59,7 @@ public: function_arguments_nodes.resize(2); function_arguments_nodes[0] = std::move(if_arguments_nodes[1]); function_arguments_nodes[1] = std::move(if_arguments_nodes[0]); - resolveAsAggregateFunctionWithIf(*function_node, function_arguments_nodes); + resolveAsAggregateFunctionWithIf(*function_node); } } else if (first_const_node) @@ -79,21 +79,17 @@ public: function_arguments_nodes.resize(2); function_arguments_nodes[0] = std::move(if_arguments_nodes[2]); function_arguments_nodes[1] = std::move(not_function); - resolveAsAggregateFunctionWithIf(*function_node, function_arguments_nodes); + resolveAsAggregateFunctionWithIf(*function_node); } } } private: - static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const QueryTreeNodes & arguments) + static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node) { auto result_type = function_node.getResultType(); auto suffix = result_type->isNullable() ? "OrNullIf" : "If"; - - resolveAggregateFunctionNodeByName( - function_node, - function_node.getFunctionName() + suffix, - {arguments[0]->getResultType(), arguments[1]->getResultType()}); + resolveAggregateFunctionNodeByName(function_node, function_node.getFunctionName() + suffix); } }; diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp index 2f6674946a3..39f9e3b625b 100644 --- a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp +++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp @@ -78,11 +78,11 @@ public: const auto lhs = std::make_shared("sum"); lhs->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); - resolveAggregateFunctionNodeByName(*lhs, lhs->getFunctionName(), {column_type}); + resolveAggregateFunctionNodeByName(*lhs, lhs->getFunctionName()); const auto rhs_count = std::make_shared("count"); rhs_count->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); - resolveAggregateFunctionNodeByName(*rhs_count, rhs_count->getFunctionName(), {column_type}); + resolveAggregateFunctionNodeByName(*rhs_count, rhs_count->getFunctionName()); const auto rhs = std::make_shared("multiply"); rhs->getArguments().getNodes().push_back(func_plus_minus_nodes[literal_id]); diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 78d5479843e..e072ba5ad48 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -67,7 +67,7 @@ public: function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]); function_node_arguments_nodes.resize(1); - resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); + resolveAggregateFunctionNodeByName(*function_node, "countIf"); if (constant_value_literal.get() != 1) { @@ -115,7 +115,7 @@ public: function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0]; function_node_arguments_nodes.resize(1); - resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); + resolveAggregateFunctionNodeByName(*function_node, "countIf"); if (if_true_condition_value != 1) { @@ -144,7 +144,7 @@ public: function_node_arguments_nodes[0] = std::move(not_function); function_node_arguments_nodes.resize(1); - resolveAggregateFunctionNodeByName(*function_node, "countIf", {function_node_arguments_nodes[0]->getResultType()}); + resolveAggregateFunctionNodeByName(*function_node, "countIf"); if (if_false_condition_value != 1) { diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index 610128a5754..1339fc07ac8 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -68,15 +68,7 @@ public: if (!replaced_argument) return; - const auto & function_node_argument_nodes = function_node->getArguments().getNodes(); - - DataTypes argument_types; - argument_types.reserve(function_node_argument_nodes.size()); - - for (const auto & function_node_argument : function_node_argument_nodes) - argument_types.emplace_back(function_node_argument->getResultType()); - - resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName(), argument_types); + resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName()); } }; diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index d5e4e011cfa..929c2731e5d 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -177,7 +177,7 @@ public: if (match_subquery_with_distinct() || match_subquery_with_group_by()) { function_node->getArguments().getNodes().clear(); - resolveAggregateFunctionNodeByName(*function_node, "count", {}); + resolveAggregateFunctionNodeByName(*function_node, "count"); } } }; diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index c193619a35f..efada8ef16a 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -528,16 +528,16 @@ private: bool has_function = false; }; -inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_node) +inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode & function_node, const String & function_name) { Array parameters; - for (const auto & param : function_node->getParameters()) + for (const auto & param : function_node.getParameters()) { auto * constant = param->as(); parameters.push_back(constant->getValue()); } - const auto & function_node_argument_nodes = function_node->getArguments().getNodes(); + const auto & function_node_argument_nodes = function_node.getArguments().getNodes(); DataTypes argument_types; argument_types.reserve(function_node_argument_nodes.size()); @@ -547,7 +547,7 @@ inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_nod AggregateFunctionProperties properties; auto action = NullsAction::EMPTY; - return AggregateFunctionFactory::instance().get(function_node->getFunctionName(), action, argument_types, parameters, properties); + return AggregateFunctionFactory::instance().get(function_name, action, argument_types, parameters, properties); } } @@ -628,11 +628,11 @@ void rerunFunctionResolve(FunctionNode * function_node, ContextPtr context) { if (name == "nothing") return; - function_node->resolveAsAggregateFunction(resolveAggregateFunction(function_node)); + function_node->resolveAsAggregateFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName())); } else if (function_node->isWindowFunction()) { - function_node->resolveAsWindowFunction(resolveAggregateFunction(function_node)); + function_node->resolveAsWindowFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName())); } } @@ -691,19 +691,9 @@ void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const Strin function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); } -void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name, const DataTypes & argument_types) +void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name) { - chassert(function_node.isAggregateFunction()); - auto old_aggregate_function = function_node.getAggregateFunction(); - - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( - function_name, - function_node.getNullsAction(), - argument_types, - old_aggregate_function->getParameters(), - properties); - + auto aggregate_function = resolveAggregateFunction(function_node, function_name); function_node.resolveAsAggregateFunction(std::move(aggregate_function)); } diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index 60f32d6b267..75d874c1736 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -108,6 +108,6 @@ void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const Strin /// Resolves function node as aggregate function with given name. /// Arguments and parameters are taken from the node. -void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name, const DataTypes & argument_types); +void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name); } From d09f5d18f16c7e988338531be2432187ce633891 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 24 Jan 2024 18:14:38 +0100 Subject: [PATCH 011/273] Repro test --- .../02967_prewhere_no_columns.reference | 2 + .../0_stateless/02967_prewhere_no_columns.sql | 51 +++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 tests/queries/0_stateless/02967_prewhere_no_columns.reference create mode 100644 tests/queries/0_stateless/02967_prewhere_no_columns.sql diff --git a/tests/queries/0_stateless/02967_prewhere_no_columns.reference b/tests/queries/0_stateless/02967_prewhere_no_columns.reference new file mode 100644 index 00000000000..df105254618 --- /dev/null +++ b/tests/queries/0_stateless/02967_prewhere_no_columns.reference @@ -0,0 +1,2 @@ +105 +105 diff --git a/tests/queries/0_stateless/02967_prewhere_no_columns.sql b/tests/queries/0_stateless/02967_prewhere_no_columns.sql new file mode 100644 index 00000000000..efcc952caa2 --- /dev/null +++ b/tests/queries/0_stateless/02967_prewhere_no_columns.sql @@ -0,0 +1,51 @@ +CREATE TABLE t_02967 +( + `key` Date, + `value` UInt16 +) +ENGINE = MergeTree +ORDER BY key +SETTINGS + index_granularity_bytes = 0 --8192 --, min_index_granularity_bytes = 2 + , index_granularity = 100 + , min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0 +-- +-- , min_bytes_for_wide_part = 2 +AS SELECT + number, + repeat(toString(number), 5) +FROM numbers(105.); + + + +-- Check with newly inserted data part. It's in-memory structured are filled at insert time. +SELECT + count(ignore(*)) +FROM t_02967 +PREWHERE CAST(ignore() + 1 as UInt8) +GROUP BY + ignore(65535, *), + ignore(255, 256, *) +SETTINGS + --send_logs_level='test', + max_threads=1; + + + +-- Reload part form disk to check that in-meory structures where properly serilaized-deserialized +DETACH TABLE t_02967; +ATTACH TABLE t_02967; + + +SELECT + count(ignore(*)) +FROM t_02967 +PREWHERE CAST(ignore() + 1 as UInt8) +GROUP BY + ignore(65535, *), + ignore(255, 256, *) +SETTINGS + --send_logs_level='test', + max_threads=1; + +DROP TABLE t_02967; From a11a5e783b0089a943f96ce58fedcf15bb586fc0 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 24 Jan 2024 18:15:46 +0100 Subject: [PATCH 012/273] Adjust last granule after reading row count --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 87f23b0da2a..d7221f5a536 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1176,6 +1176,14 @@ void IMergeTreeDataPart::loadRowsCount() auto buf = metadata_manager->read("count.txt"); readIntText(rows_count, *buf); assertEOF(*buf); + + if (!index_granularity.empty() && rows_count < index_granularity.getTotalRows() && index_granularity_info.fixed_index_granularity) + { + /// Adjust last granule size to match the number of rows in the part in case of fixed index_granularity. + index_granularity.popMark(); + index_granularity.appendMark(rows_count % index_granularity_info.fixed_index_granularity); + chassert(rows_count == index_granularity.getTotalRows()); + } }; if (index_granularity.empty()) From 367a874edd25ffe4c9cfc1e2105e9fad899c68bd Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 24 Jan 2024 18:16:15 +0100 Subject: [PATCH 013/273] Adjust last granule when creating part --- src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 9d373504473..a670807a997 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -760,7 +760,7 @@ void MergeTreeDataPartWriterWide::adjustLastMarkIfNeedAndFlushToDisk(size_t new_ /// We can adjust marks only if we computed granularity for blocks. /// Otherwise we cannot change granularity because it will differ from /// other columns - if (compute_granularity && settings.can_use_adaptive_granularity) +// if (compute_granularity && settings.can_use_adaptive_granularity) { if (getCurrentMark() != index_granularity.getMarksCount() - 1) throw Exception(ErrorCodes::LOGICAL_ERROR, From 502b8239a219bdace74e1de32e337c97024c2bee Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Thu, 25 Jan 2024 13:40:02 +0100 Subject: [PATCH 014/273] Allow last mark not to match fixed granularity value --- src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index a670807a997..bb60e682f1b 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -535,7 +535,10 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai if (index_granularity_rows != index_granularity.getMarkRows(mark_num)) { - throw Exception( + /// With fixed granularity we can have last mark with less rows than granularity + const bool is_last_mark = (mark_num + 1 == index_granularity.getMarksCount()); + if (!data_part->index_granularity_info.fixed_index_granularity || !is_last_mark) + throw Exception( ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{}" " (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}", From ca2bfdb9aec54bb0bf3c983bd0c62fcf2c48c1dd Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Thu, 25 Jan 2024 13:41:49 +0100 Subject: [PATCH 015/273] Update according to last mark smaller size --- tests/queries/1_stateful/00166_explain_estimate.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/1_stateful/00166_explain_estimate.reference b/tests/queries/1_stateful/00166_explain_estimate.reference index 71ddd681581..85ecd0b9a71 100644 --- a/tests/queries/1_stateful/00166_explain_estimate.reference +++ b/tests/queries/1_stateful/00166_explain_estimate.reference @@ -1,5 +1,5 @@ test hits 1 57344 7 -test hits 1 8839168 1079 -test hits 1 835584 102 +test hits 1 8832938 1079 +test hits 1 829354 102 test hits 1 8003584 977 test hits 2 581632 71 From 02b349822f10c1026ee1bf1b3b0d4f213864d929 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Mon, 29 Jan 2024 19:03:34 +0100 Subject: [PATCH 016/273] Fix for last mark equal to fixed granule size --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index d7221f5a536..8ebe39a916e 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1182,7 +1182,10 @@ void IMergeTreeDataPart::loadRowsCount() /// Adjust last granule size to match the number of rows in the part in case of fixed index_granularity. index_granularity.popMark(); index_granularity.appendMark(rows_count % index_granularity_info.fixed_index_granularity); - chassert(rows_count == index_granularity.getTotalRows()); + if (rows_count != index_granularity.getTotalRows()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Index granularity total rows in part {} does not match rows_count: {}, instead of {}", + name, index_granularity.getTotalRows(), rows_count); } }; From 7d62e224b50d10188b9876684a8a564b5965347c Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 5 Feb 2024 15:31:42 +0100 Subject: [PATCH 017/273] Pass correct Context --- .../Passes/ComparisonTupleEliminationPass.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp index 42b53f667b4..88da37f014b 100644 --- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp +++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp @@ -19,19 +19,18 @@ namespace DB namespace { -class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitor +class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitorWithContext { public: - explicit ComparisonTupleEliminationPassVisitor(ContextPtr context_) - : context(std::move(context_)) - {} + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; static bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child) { return child->getNodeType() != QueryTreeNodeType::TABLE_FUNCTION; } - void visitImpl(QueryTreeNodePtr & node) const + void enterImpl(QueryTreeNodePtr & node) const { auto * function_node = node->as(); if (!function_node) @@ -172,13 +171,13 @@ private: { auto result_function = std::make_shared("and"); result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions); - resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), context); + resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), getContext()); if (comparison_function_name == "notEquals") { auto not_function = std::make_shared("not"); not_function->getArguments().getNodes().push_back(std::move(result_function)); - resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), context); + resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), getContext()); result_function = std::move(not_function); } @@ -198,12 +197,10 @@ private: comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument)); comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument)); - resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), context); + resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), getContext()); return comparison_function; } - - ContextPtr context; }; } From bac29c0bbafb7a97e033ca08bdc12a9d13914c15 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 6 Feb 2024 15:52:19 +0000 Subject: [PATCH 018/273] add test for variant subcolumn --- .../RewriteFunctionToSubcolumnVisitor.cpp | 6 ++++++ ..._functions_to_subcolumns_variant.reference | 8 +++++++ .../02971_functions_to_subcolumns_variant.sql | 21 +++++++++++++++++++ 3 files changed, 35 insertions(+) create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference create mode 100644 tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 04451947796..437d46c24b2 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -124,6 +124,12 @@ void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & funct if (value_type == Field::Types::UInt64 || value_type == Field::Types::String) ++data.optimized_identifiers_count[column->name]; } + else if (function.name == "variantElement" && column_type_id == TypeIndex::Variant) + { + const auto * literal = arguments[1]->as(); + if (literal && literal->value.getType() == Field::Types::String) + ++data.optimized_identifiers_count[column->name]; + } else if (function.name == "mapContains" && column_type_id == TypeIndex::Map) { ++data.optimized_identifiers_count[column->name]; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference new file mode 100644 index 00000000000..7a52155fc2d --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference @@ -0,0 +1,8 @@ +SELECT `v.String` AS `variantElement(v, \'String\')` +FROM t_func_to_subcolumns_variant +foo +\N +SELECT __table1.`v.String` AS `variantElement(v, \'String\')` +FROM default.t_func_to_subcolumns_variant AS __table1 +foo +\N diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql new file mode 100644 index 00000000000..1cedd877289 --- /dev/null +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS t_func_to_subcolumns_variant; + +SET allow_experimental_variant_type = 1; + +CREATE TABLE t_func_to_subcolumns_variant (id UInt64, v Variant(String, UInt64)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_func_to_subcolumns_variant VALUES (1, 'foo') (2, 111); + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 0; + +EXPLAIN SYNTAX SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; +SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; + +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; +SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; + +DROP TABLE t_func_to_subcolumns_variant; From 46f6867896acec7ee52a25a1e215e78a1eb9365d Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 6 Feb 2024 17:37:02 +0000 Subject: [PATCH 019/273] refactor FunctionToSubcolumnsPass --- .../Passes/FunctionToSubcolumnsPass.cpp | 399 ++++++++---------- ...2971_functions_to_subcolumns_map.reference | 16 + .../02971_functions_to_subcolumns_map.sql | 12 + 3 files changed, 210 insertions(+), 217 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 877b0ef7232..f0392a0d9d4 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -25,6 +26,181 @@ namespace DB namespace { +void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnNode & column_node, ContextPtr) +{ + /// Replace `length(argument)` with `argument.size0` + /// `argument` may be Array or Map. + + NameAndTypePair column{column_node.getColumnName() + ".size0", std::make_shared()}; + node = std::make_shared(column, column_node.getColumnSource()); +} + +template +void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) +{ + /// Replace `empty(argument)` with `equals(argument.size0, 0)` if positive + /// Replace `notEmpty(argument)` with `notEquals(argument.size0, 0)` if not positive + /// `argument` may be Array or Map. + + NameAndTypePair column{column_node.getColumnName() + ".size0", std::make_shared()}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + function_arguments_nodes.clear(); + function_arguments_nodes.push_back(std::make_shared(column, column_node.getColumnSource())); + function_arguments_nodes.push_back(std::make_shared(static_cast(0))); + + auto function_name = positive ? "equals" : "notEquals"; + resolveOrdinaryFunctionNodeByName(function_node, function_name, std::move(context)); +} + +String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple) +{ + if (value.getType() == Field::Types::String) + return value.get(); + + if (value.getType() == Field::Types::UInt64) + return data_type_tuple.getNameByPosition(value.get()); + + return ""; +} + +String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &) +{ + if (value.getType() == Field::Types::String) + return value.get(); + + return ""; +} + +template +void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) +{ + /// Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` with `tuple_argument.column_name`. + /// Replace `variantElement(variant_argument, string_literal)` with `variant_argument.column_name`. + + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + if (function_arguments_nodes.size() != 2) + return; + + const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); + if (!second_argument_constant_node) + return; + + auto column_type = column_node.getColumnType(); + const auto & data_type_concrete = assert_cast(*column_type); + + auto subcolumn_name = getSubcolumnNameForElement(second_argument_constant_node->getValue(), data_type_concrete); + if (subcolumn_name.empty()) + return; + + NameAndTypePair column{column_node.getColumnName() + "." + subcolumn_name, function_node.getResultType()}; + node = std::make_shared(column, column_node.getColumnSource()); +} + +using NodeToSubcolumnTransformer = std::function; + +std::map, NodeToSubcolumnTransformer> node_transformers = +{ + { + {TypeIndex::Array, "length"}, optimizeFunctionLength, + }, + { + {TypeIndex::Array, "empty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Array, "notEmpty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Map, "length"}, optimizeFunctionLength, + }, + { + {TypeIndex::Map, "empty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Map, "notEmpty"}, optimizeFunctionEmpty, + }, + { + {TypeIndex::Map, "mapKeys"}, + [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) + { + /// Replace `mapKeys(map_argument)` with `map_argument.keys` + NameAndTypePair column{column_node.getColumnName() + ".keys", function_node.getResultType()}; + node = std::make_shared(column, column_node.getColumnSource()); + }, + }, + { + {TypeIndex::Map, "mapValues"}, + [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) + { + /// Replace `mapValues(map_argument)` with `map_argument.values` + NameAndTypePair column{column_node.getColumnName() + ".values", function_node.getResultType()}; + node = std::make_shared(column, column_node.getColumnSource()); + }, + }, + { + {TypeIndex::Map, "mapContains"}, + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + { + /// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)` + auto column_type = column_node.getColumnType(); + const auto & data_type_map = assert_cast(*column_type); + + NameAndTypePair column{column_node.getColumnName() + ".keys", std::make_shared(data_type_map.getKeyType())}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + auto has_function_argument = std::make_shared(column, column_node.getColumnSource()); + function_arguments_nodes[0] = std::move(has_function_argument); + + resolveOrdinaryFunctionNodeByName(function_node, "has", context); + }, + }, + { + {TypeIndex::Nullable, "count"}, + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + { + /// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))` + NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + auto new_column_node = std::make_shared(column, column_node.getColumnSource()); + auto function_node_not = std::make_shared("not"); + + function_node_not->getArguments().getNodes().push_back(std::move(new_column_node)); + resolveOrdinaryFunctionNodeByName(*function_node_not, "not", context); + + function_arguments_nodes = {std::move(function_node_not)}; + resolveAggregateFunctionNodeByName(function_node, "sum"); + }, + }, + { + {TypeIndex::Nullable, "isNull"}, + [](QueryTreeNodePtr & node, FunctionNode &, ColumnNode & column_node, ContextPtr) + { + /// Replace `isNull(nullable_argument)` with `nullable_argument.null` + NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; + node = std::make_shared(column, column_node.getColumnSource()); + }, + }, + { + {TypeIndex::Nullable, "isNotNull"}, + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + { + /// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)` + NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; + auto & function_arguments_nodes = function_node.getArguments().getNodes(); + + function_arguments_nodes = {std::make_shared(column, column_node.getColumnSource())}; + resolveOrdinaryFunctionNodeByName(function_node, "not", context); + }, + }, + { + {TypeIndex::Tuple, "tupleElement"}, optimizeTupleOrVariantElement, + }, + { + {TypeIndex::Variant, "variantElement"}, optimizeTupleOrVariantElement, + }, +}; + std::tuple getTypedNodesForOptimization(const QueryTreeNodePtr & node) { auto * function_node = node->as(); @@ -161,54 +337,13 @@ private: void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node) { - const auto & function_arguments_nodes = function_node.getArguments().getNodes(); - const auto & function_name = function_node.getFunctionName(); - auto column = first_argument_column_node.getColumn(); - WhichDataType column_type(column.type); - auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); + Identifier qualified_name({table_name, column.name}); - if (function_arguments_nodes.size() == 1) - { - if (column_type.isArray()) - { - if (function_name == "length" || function_name == "empty" || function_name == "notEmpty") - ++data.optimized_identifiers_count[qualified_name]; - } - else if (column_type.isNullable()) - { - if (function_name == "count" || function_name == "isNull" || function_name == "isNotNull") - ++data.optimized_identifiers_count[qualified_name]; - } - else if (column_type.isMap()) - { - if (function_name == "length" || function_name == "mapKeys" || function_name == "mapValues") - ++data.optimized_identifiers_count[qualified_name]; - } - } - else if (function_arguments_nodes.size() == 2) - { - const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); - if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) - { - const auto & constant_value = second_argument_constant_node->getValue(); - const auto & constant_value_type = constant_value.getType(); - - if (constant_value_type == Field::Types::String || constant_value_type == Field::Types::UInt64) - ++data.optimized_identifiers_count[qualified_name]; - } - else if (function_name == "variantElement" && column_type.isVariant() && second_argument_constant_node) - { - if (second_argument_constant_node->getValue().getType() == Field::Types::String) - ++data.optimized_identifiers_count[qualified_name]; - } - else if (function_name == "mapContains" && column_type.isMap()) - { - ++data.optimized_identifiers_count[qualified_name]; - } - } + if (node_transformers.contains({column.type->getTypeId(), function_node.getFunctionName()})) + ++data.optimized_identifiers_count[qualified_name]; } }; @@ -236,9 +371,6 @@ public: if (!function_node || !first_argument_column_node || !table_node) return; - auto & function_arguments_nodes = function_node->getArguments().getNodes(); - const auto & function_name = function_node->getFunctionName(); - auto column = first_argument_column_node->getColumn(); auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); @@ -246,176 +378,9 @@ public: if (!identifiers_to_optimize.contains(qualified_name)) return; - auto column_source = first_argument_column_node->getColumnSource(); - WhichDataType column_type(column.type); - - if (function_arguments_nodes.size() == 1) - { - if (column_type.isArray()) - { - if (function_name == "length") - { - /// Replace `length(array_argument)` with `array_argument.size0` - column.name += ".size0"; - column.type = std::make_shared(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "empty") - { - /// Replace `empty(array_argument)` with `equals(array_argument.size0, 0)` - column.name += ".size0"; - column.type = std::make_shared(); - - function_arguments_nodes.clear(); - function_arguments_nodes.push_back(std::make_shared(column, column_source)); - function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - - resolveOrdinaryFunctionNodeByName(*function_node, "equals", getContext()); - } - else if (function_name == "notEmpty") - { - /// Replace `notEmpty(array_argument)` with `notEquals(array_argument.size0, 0)` - column.name += ".size0"; - column.type = std::make_shared(); - - function_arguments_nodes.clear(); - function_arguments_nodes.push_back(std::make_shared(column, column_source)); - function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - - resolveOrdinaryFunctionNodeByName(*function_node, "notEquals", getContext()); - } - } - else if (column_type.isNullable()) - { - if (function_name == "count") - { - /// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))` - column.name += ".null"; - column.type = std::make_shared(); - - auto column_node = std::make_shared(column, column_source); - auto function_node_not = std::make_shared("not"); - - function_node_not->getArguments().getNodes().push_back(std::move(column_node)); - resolveOrdinaryFunctionNodeByName(*function_node_not, "not", getContext()); - - function_arguments_nodes = {std::move(function_node_not)}; - resolveAggregateFunctionNodeByName(*function_node, "sum"); - } - else if (function_name == "isNull") - { - /// Replace `isNull(nullable_argument)` with `nullable_argument.null` - column.name += ".null"; - column.type = std::make_shared(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "isNotNull") - { - /// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)` - column.name += ".null"; - column.type = std::make_shared(); - - function_arguments_nodes = {std::make_shared(column, column_source)}; - - resolveOrdinaryFunctionNodeByName(*function_node, "not", getContext()); - } - } - else if (column_type.isMap()) - { - if (function_name == "length") - { - /// Replace `length(map_argument)` with `map_argument.size0` - column.name += ".size0"; - column.type = std::make_shared(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "mapKeys") - { - /// Replace `mapKeys(map_argument)` with `map_argument.keys` - column.name += ".keys"; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "mapValues") - { - /// Replace `mapValues(map_argument)` with `map_argument.values` - column.name += ".values"; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - } - } - else if (function_arguments_nodes.size() == 2) - { - const auto * second_argument_constant_node = function_arguments_nodes[1]->as(); - if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node) - { - /** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` - * with `tuple_argument.column_name`. - */ - const auto & tuple_element_constant_value = second_argument_constant_node->getValue(); - const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType(); - - const auto & data_type_tuple = assert_cast(*column.type); - - String subcolumn_name; - - if (tuple_element_constant_value_type == Field::Types::String) - { - subcolumn_name = tuple_element_constant_value.get(); - } - else if (tuple_element_constant_value_type == Field::Types::UInt64) - { - auto tuple_column_index = tuple_element_constant_value.get(); - subcolumn_name = data_type_tuple.getNameByPosition(tuple_column_index); - } - else - { - return; - } - - column.name += '.'; - column.name += subcolumn_name; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "variantElement" && isVariant(column_type) && second_argument_constant_node) - { - /// Replace `variantElement(variant_argument, type_name)` with `variant_argument.type_name`. - const auto & variant_element_constant_value = second_argument_constant_node->getValue(); - String subcolumn_name; - - if (variant_element_constant_value.getType() != Field::Types::String) - return; - - subcolumn_name = variant_element_constant_value.get(); - - column.name += '.'; - column.name += subcolumn_name; - column.type = function_node->getResultType(); - - node = std::make_shared(column, column_source); - } - else if (function_name == "mapContains" && column_type.isMap()) - { - const auto & data_type_map = assert_cast(*column.type); - - /// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)` - column.name += ".keys"; - column.type = std::make_shared(data_type_map.getKeyType()); - - auto has_function_argument = std::make_shared(column, column_source); - function_arguments_nodes[0] = std::move(has_function_argument); - - resolveOrdinaryFunctionNodeByName(*function_node, "has", getContext()); - } - } + auto transformer_it = node_transformers.find({column.type->getTypeId(), function_node->getFunctionName()}); + if (transformer_it != node_transformers.end()) + transformer_it->second(node, *function_node, *first_argument_column_node, getContext()); } }; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference index 90596ce1000..50f21842ac1 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference @@ -2,7 +2,23 @@ SELECT `m.size0` AS `length(m)` FROM t_func_to_subcolumns_map 2 1 +SELECT `m.size0` = 0 AS `empty(m)` +FROM t_func_to_subcolumns_map +0 +0 +SELECT `m.size0` != 0 AS `notEmpty(m)` +FROM t_func_to_subcolumns_map +1 +1 SELECT __table1.`m.size0` AS `length(m)` FROM default.t_func_to_subcolumns_map AS __table1 2 1 +SELECT __table1.`m.size0` = 0 AS `empty(m)` +FROM default.t_func_to_subcolumns_map AS __table1 +0 +0 +SELECT __table1.`m.size0` != 0 AS `notEmpty(m)` +FROM default.t_func_to_subcolumns_map AS __table1 +1 +1 diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql index b5687696b43..c574e1033c0 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql @@ -10,10 +10,22 @@ SET allow_experimental_analyzer = 0; EXPLAIN SYNTAX SELECT length(m) FROM t_func_to_subcolumns_map; SELECT length(m) FROM t_func_to_subcolumns_map; +EXPLAIN SYNTAX SELECT empty(m) FROM t_func_to_subcolumns_map; +SELECT empty(m) FROM t_func_to_subcolumns_map; + +EXPLAIN SYNTAX SELECT notEmpty(m) FROM t_func_to_subcolumns_map; +SELECT notEmpty(m) FROM t_func_to_subcolumns_map; + SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(m) FROM t_func_to_subcolumns_map; SELECT length(m) FROM t_func_to_subcolumns_map; +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT empty(m) FROM t_func_to_subcolumns_map; +SELECT empty(m) FROM t_func_to_subcolumns_map; + +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT notEmpty(m) FROM t_func_to_subcolumns_map; +SELECT notEmpty(m) FROM t_func_to_subcolumns_map; + DROP TABLE t_func_to_subcolumns_map; From 361b5a20771b17305b7d11e626c6f9eba9b77fe3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 7 Feb 2024 18:19:15 +0000 Subject: [PATCH 020/273] more refactoring of FunctionToSubcolumnsPass --- .../Passes/FunctionToSubcolumnsPass.cpp | 86 ++++++++++--------- 1 file changed, 47 insertions(+), 39 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index f0392a0d9d4..954ae6df13e 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -26,31 +26,40 @@ namespace DB namespace { -void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnNode & column_node, ContextPtr) +struct ColumnContext +{ + NameAndTypePair column; + QueryTreeNodePtr column_source; + ContextPtr context; +}; + +using NodeToSubcolumnTransformer = std::function; + +void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx) { /// Replace `length(argument)` with `argument.size0` /// `argument` may be Array or Map. - NameAndTypePair column{column_node.getColumnName() + ".size0", std::make_shared()}; - node = std::make_shared(column, column_node.getColumnSource()); + NameAndTypePair column{ctx.column.name + ".size0", std::make_shared()}; + node = std::make_shared(column, ctx.column_source); } template -void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) +void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `empty(argument)` with `equals(argument.size0, 0)` if positive /// Replace `notEmpty(argument)` with `notEquals(argument.size0, 0)` if not positive /// `argument` may be Array or Map. - NameAndTypePair column{column_node.getColumnName() + ".size0", std::make_shared()}; + NameAndTypePair column{ctx.column.name + ".size0", std::make_shared()}; auto & function_arguments_nodes = function_node.getArguments().getNodes(); function_arguments_nodes.clear(); - function_arguments_nodes.push_back(std::make_shared(column, column_node.getColumnSource())); + function_arguments_nodes.push_back(std::make_shared(column, ctx.column_source)); function_arguments_nodes.push_back(std::make_shared(static_cast(0))); auto function_name = positive ? "equals" : "notEquals"; - resolveOrdinaryFunctionNodeByName(function_node, function_name, std::move(context)); + resolveOrdinaryFunctionNodeByName(function_node, function_name, std::move(ctx.context)); } String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple) @@ -73,7 +82,7 @@ String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &) } template -void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) +void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` with `tuple_argument.column_name`. /// Replace `variantElement(variant_argument, string_literal)` with `variant_argument.column_name`. @@ -86,19 +95,16 @@ void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & funct if (!second_argument_constant_node) return; - auto column_type = column_node.getColumnType(); - const auto & data_type_concrete = assert_cast(*column_type); - + const auto & data_type_concrete = assert_cast(*ctx.column.type); auto subcolumn_name = getSubcolumnNameForElement(second_argument_constant_node->getValue(), data_type_concrete); + if (subcolumn_name.empty()) return; - NameAndTypePair column{column_node.getColumnName() + "." + subcolumn_name, function_node.getResultType()}; - node = std::make_shared(column, column_node.getColumnSource()); + NameAndTypePair column{ctx.column.name + "." + subcolumn_name, function_node.getResultType()}; + node = std::make_shared(column, ctx.column_source); } -using NodeToSubcolumnTransformer = std::function; - std::map, NodeToSubcolumnTransformer> node_transformers = { { @@ -121,52 +127,51 @@ std::map, NodeToSubcolumnTransformer> node_transfor }, { {TypeIndex::Map, "mapKeys"}, - [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) + [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `mapKeys(map_argument)` with `map_argument.keys` - NameAndTypePair column{column_node.getColumnName() + ".keys", function_node.getResultType()}; - node = std::make_shared(column, column_node.getColumnSource()); + NameAndTypePair column{ctx.column.name + ".keys", function_node.getResultType()}; + node = std::make_shared(column, ctx.column_source); }, }, { {TypeIndex::Map, "mapValues"}, - [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnNode & column_node, ContextPtr) + [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `mapValues(map_argument)` with `map_argument.values` - NameAndTypePair column{column_node.getColumnName() + ".values", function_node.getResultType()}; - node = std::make_shared(column, column_node.getColumnSource()); + NameAndTypePair column{ctx.column.name + ".values", function_node.getResultType()}; + node = std::make_shared(column, ctx.column_source); }, }, { {TypeIndex::Map, "mapContains"}, - [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)` - auto column_type = column_node.getColumnType(); - const auto & data_type_map = assert_cast(*column_type); + const auto & data_type_map = assert_cast(*ctx.column.type); - NameAndTypePair column{column_node.getColumnName() + ".keys", std::make_shared(data_type_map.getKeyType())}; + NameAndTypePair column{ctx.column.name + ".keys", std::make_shared(data_type_map.getKeyType())}; auto & function_arguments_nodes = function_node.getArguments().getNodes(); - auto has_function_argument = std::make_shared(column, column_node.getColumnSource()); + auto has_function_argument = std::make_shared(column, ctx.column_source); function_arguments_nodes[0] = std::move(has_function_argument); - resolveOrdinaryFunctionNodeByName(function_node, "has", context); + resolveOrdinaryFunctionNodeByName(function_node, "has", ctx.context); }, }, { {TypeIndex::Nullable, "count"}, - [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))` - NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; + NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; auto & function_arguments_nodes = function_node.getArguments().getNodes(); - auto new_column_node = std::make_shared(column, column_node.getColumnSource()); + auto new_column_node = std::make_shared(column, ctx.column_source); auto function_node_not = std::make_shared("not"); function_node_not->getArguments().getNodes().push_back(std::move(new_column_node)); - resolveOrdinaryFunctionNodeByName(*function_node_not, "not", context); + resolveOrdinaryFunctionNodeByName(*function_node_not, "not", ctx.context); function_arguments_nodes = {std::move(function_node_not)}; resolveAggregateFunctionNodeByName(function_node, "sum"); @@ -174,23 +179,23 @@ std::map, NodeToSubcolumnTransformer> node_transfor }, { {TypeIndex::Nullable, "isNull"}, - [](QueryTreeNodePtr & node, FunctionNode &, ColumnNode & column_node, ContextPtr) + [](QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx) { /// Replace `isNull(nullable_argument)` with `nullable_argument.null` - NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; - node = std::make_shared(column, column_node.getColumnSource()); + NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; + node = std::make_shared(column, ctx.column_source); }, }, { {TypeIndex::Nullable, "isNotNull"}, - [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnNode & column_node, ContextPtr context) + [](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) { /// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)` - NameAndTypePair column{column_node.getColumnName() + ".null", std::make_shared()}; + NameAndTypePair column{ctx.column.name + ".null", std::make_shared()}; auto & function_arguments_nodes = function_node.getArguments().getNodes(); - function_arguments_nodes = {std::make_shared(column, column_node.getColumnSource())}; - resolveOrdinaryFunctionNodeByName(function_node, "not", context); + function_arguments_nodes = {std::make_shared(column, ctx.column_source)}; + resolveOrdinaryFunctionNodeByName(function_node, "not", ctx.context); }, }, { @@ -380,7 +385,10 @@ public: auto transformer_it = node_transformers.find({column.type->getTypeId(), function_node->getFunctionName()}); if (transformer_it != node_transformers.end()) - transformer_it->second(node, *function_node, *first_argument_column_node, getContext()); + { + ColumnContext ctx{std::move(column), first_argument_column_node->getColumnSource(), getContext()}; + transformer_it->second(node, *function_node, ctx); + } } }; From 1da258bfda0fcae33451efd670e8486910052c40 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 4 Mar 2024 15:35:03 +0000 Subject: [PATCH 021/273] better functions to subcolumns optimization --- .../Passes/FunctionToSubcolumnsPass.cpp | 96 ++++++----- .../RewriteAggregateFunctionWithIfPass.cpp | 2 +- ...functions_to_subcolumns_analyzer.reference | 149 +++++++++++++++++- ...01872_functions_to_subcolumns_analyzer.sql | 12 +- ...03_functions_to_subcolumns_final.reference | 25 +++ .../03003_functions_to_subcolumns_final.sql | 23 +++ 6 files changed, 242 insertions(+), 65 deletions(-) create mode 100644 tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference create mode 100644 tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 4ebcd59d8ec..8ba33a50ccf 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -58,8 +58,8 @@ void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, Col function_arguments_nodes.push_back(std::make_shared(column, ctx.column_source)); function_arguments_nodes.push_back(std::make_shared(static_cast(0))); - auto function_name = positive ? "equals" : "notEquals"; - resolveOrdinaryFunctionNodeByName(function_node, function_name, std::move(ctx.context)); + const auto * function_name = positive ? "equals" : "notEquals"; + resolveOrdinaryFunctionNodeByName(function_node, function_name, ctx.context); } String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple) @@ -246,24 +246,11 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - struct Data - { - bool has_final = false; - std::unordered_set all_key_columns; - std::unordered_map indentifiers_count; - std::unordered_map optimized_identifiers_count; - }; - - Data getData() const { return data; } - void enterImpl(const QueryTreeNodePtr & node) { if (!getSettings().optimize_functions_to_subcolumns) return; - if (data.has_final) - return; - if (auto * table_node = node->as()) { enterImpl(*table_node); @@ -284,18 +271,45 @@ public: } } + std::unordered_set getIdentifiersToOptimize() const + { + /// Do not optimize if full column is requested in other context. + /// It doesn't make sense because it doesn't reduce amount of read data + /// and optimized functions are not computation heavy. But introducing + /// new identifier complicates query analysis and may break it. + /// + /// E.g. query: + /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) + /// may be optimized to incorrect query: + /// SELECT n FROM table GROUP BY n HAVING not(n.null) + /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) + /// + /// Do not optimize index columns (primary, min-max, secondary), + /// because otherwise analysis of indexes may be broken. + /// TODO: handle subcolumns in index analysis. + + std::unordered_set identifiers_to_optimize; + for (const auto & [identifier, count] : optimized_identifiers_count) + { + if (all_key_columns.contains(identifier)) + continue; + + auto it = identifiers_count.find(identifier); + if (it != identifiers_count.end() && it->second == count) + identifiers_to_optimize.insert(identifier); + } + + return identifiers_to_optimize; + } + private: - Data data; + std::unordered_set all_key_columns; + std::unordered_map identifiers_count; + std::unordered_map optimized_identifiers_count; NameSet processed_tables; void enterImpl(const TableNode & table_node) { - if (table_node.hasTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal()) - { - data.has_final = true; - return; - } - auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); if (processed_tables.emplace(table_name).second) return; @@ -305,7 +319,7 @@ private: for (const auto & column_name : key_columns) { Identifier identifier({table_name, column_name}); - data.all_key_columns.insert(identifier); + all_key_columns.insert(identifier); } }; @@ -337,18 +351,23 @@ private: auto table_name = table_node->getStorage()->getStorageID().getFullTableName(); Identifier qualified_name({table_name, column_node.getColumnName()}); - ++data.indentifiers_count[qualified_name]; + ++identifiers_count[qualified_name]; } void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node) { + /// For queries with FINAL converting function to subcolumn may alter + /// special merging algorithms and produce wrong result of query. + if (table_node.hasTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal()) + return; + auto column = first_argument_column_node.getColumn(); auto table_name = table_node.getStorage()->getStorageID().getFullTableName(); Identifier qualified_name({table_name, column.name}); if (node_transformers.contains({column.type->getTypeId(), function_node.getFunctionName()})) - ++data.optimized_identifiers_count[qualified_name]; + ++optimized_identifiers_count[qualified_name]; } }; @@ -398,32 +417,7 @@ void FunctionToSubcolumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPt { FunctionToSubcolumnsVisitorFirstPass first_visitor(context); first_visitor.visit(query_tree_node); - auto data = first_visitor.getData(); - - /// For queries with FINAL converting function to subcolumn may alter - /// special merging algorithms and produce wrong result of query. - if (data.has_final) - return; - - /// Do not optimize if full column is requested in other context. - /// It doesn't make sense because it doesn't reduce amount of read data - /// and optimized functions are not computation heavy. But introducing - /// new identifier complicates query analysis and may break it. - /// - /// E.g. query: - /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) - /// may be optimized to incorrect query: - /// SELECT n FROM table GROUP BY n HAVING not(n.null) - /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) - /// - /// Do not optimize index columns (primary, min-max, secondary), - /// because otherwise analysis of indexes may be broken. - /// TODO: handle subcolumns in index analysis. - - std::unordered_set identifiers_to_optimize; - for (const auto & [identifier, count] : data.optimized_identifiers_count) - if (!data.all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) - identifiers_to_optimize.insert(identifier); + auto identifiers_to_optimize = first_visitor.getIdentifiersToOptimize(); if (identifiers_to_optimize.empty()) return; diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index a8041b5b0a9..c73ff524d1f 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -88,7 +88,7 @@ private: static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node) { auto result_type = function_node.getResultType(); - auto suffix = result_type->isNullable() ? "OrNullIf" : "If"; + const auto * suffix = result_type->isNullable() ? "OrNullIf" : "If"; resolveAggregateFunctionNodeByName(function_node, function_node.getFunctionName() + suffix); } }; diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference index ce5e46fa271..e409e9ad89f 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference @@ -1,5 +1,24 @@ 0 0 1 0 1 0 +QUERY id: 0 + PROJECTION COLUMNS + isNull(id) UInt8 + isNull(n) UInt8 + isNotNull(n) UInt8 + PROJECTION + LIST id: 1, nodes: 3 + FUNCTION id: 2, function_name: isNull, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5 + COLUMN id: 6, column_name: n.null, result_type: UInt8, source_id: 5 + FUNCTION id: 7, function_name: not, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: n.null, result_type: UInt8, source_id: 5 + JOIN TREE + TABLE id: 5, alias: __table1, table_name: default.t_func_to_subcolumns + SELECT __table1.id IS NULL AS `isNull(id)`, __table1.`n.null` AS `isNull(n)`, @@ -7,6 +26,32 @@ SELECT FROM default.t_func_to_subcolumns AS __table1 3 0 1 0 0 1 0 \N +QUERY id: 0 + PROJECTION COLUMNS + length(arr) UInt64 + empty(arr) UInt8 + notEmpty(arr) UInt8 + empty(n) Nullable(UInt8) + PROJECTION + LIST id: 1, nodes: 4 + COLUMN id: 2, column_name: arr.size0, result_type: UInt64, source_id: 3 + FUNCTION id: 4, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: arr.size0, result_type: UInt64, source_id: 3 + CONSTANT id: 7, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 8, function_name: notEquals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: arr.size0, result_type: UInt64, source_id: 3 + CONSTANT id: 11, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 12, function_name: empty, function_type: ordinary, result_type: Nullable(UInt8) + ARGUMENTS + LIST id: 13, nodes: 1 + COLUMN id: 14, column_name: n, result_type: Nullable(String), source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns + SELECT __table1.`arr.size0` AS `length(arr)`, __table1.`arr.size0` = 0 AS `empty(arr)`, @@ -15,19 +60,106 @@ SELECT FROM default.t_func_to_subcolumns AS __table1 ['foo','bar'] [1,2] [] [] +QUERY id: 0 + PROJECTION COLUMNS + mapKeys(m) Array(String) + mapValues(m) Array(UInt64) + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: m.keys, result_type: Array(String), source_id: 3 + COLUMN id: 4, column_name: m.values, result_type: Array(UInt64), source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns + SELECT __table1.`m.keys` AS `mapKeys(m)`, __table1.`m.values` AS `mapValues(m)` FROM default.t_func_to_subcolumns AS __table1 1 +QUERY id: 0 + PROJECTION COLUMNS + count(n) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: not, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: n.null, result_type: UInt8, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.t_func_to_subcolumns + SELECT sum(NOT __table1.`n.null`) AS `count(n)` FROM default.t_func_to_subcolumns AS __table1 2 +QUERY id: 0 + PROJECTION COLUMNS + count(id) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5 + JOIN TREE + TABLE id: 5, alias: __table1, table_name: default.t_func_to_subcolumns + SELECT count(__table1.id) AS `count(id)` FROM default.t_func_to_subcolumns AS __table1 1 0 0 2 1 0 3 0 0 +QUERY id: 0 + PROJECTION COLUMNS + id UInt64 + isNull(n) UInt8 + isNull(right.n) UInt8 + PROJECTION + LIST id: 1, nodes: 3 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: n.null, result_type: UInt8, source_id: 3 + FUNCTION id: 5, function_name: isNull, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 6, nodes: 1 + COLUMN id: 7, column_name: n, result_type: String, source_id: 8 + JOIN TREE + JOIN id: 9, strictness: ALL, kind: FULL + LEFT TABLE EXPRESSION + TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns + RIGHT TABLE EXPRESSION + UNION id: 8, alias: __table2, is_subquery: 1, union_mode: UNION_ALL + QUERIES + LIST id: 10, nodes: 2 + QUERY id: 11, alias: __table3 + PROJECTION COLUMNS + id UInt8 + n String + PROJECTION + LIST id: 12, nodes: 2 + CONSTANT id: 13, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 14, constant_value: \'qqq\', constant_value_type: String + JOIN TREE + TABLE id: 15, alias: __table4, table_name: system.one + QUERY id: 16, alias: __table5 + PROJECTION COLUMNS + id UInt8 + \'www\' String + PROJECTION + LIST id: 17, nodes: 2 + CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 + CONSTANT id: 19, constant_value: \'www\', constant_value_type: String + JOIN TREE + TABLE id: 20, alias: __table6, table_name: system.one + JOIN EXPRESSION + LIST id: 21, nodes: 1 + COLUMN id: 22, column_name: id, result_type: UInt64, source_id: 9 + EXPRESSION + LIST id: 23, nodes: 2 + COLUMN id: 24, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 25, column_name: id, result_type: UInt8, source_id: 8 + SELECT __table1.id AS id, __table1.`n.null` AS `isNull(n)`, @@ -35,16 +167,19 @@ SELECT FROM default.t_func_to_subcolumns AS __table1 ALL FULL OUTER JOIN ( - + ( SELECT - 1 AS id, - \'qqq\' AS n - FROM system.one AS __table4 + 1 AS id, + \'qqq\' AS n + FROM system.one AS __table4 + ) UNION ALL + ( SELECT - 3 AS id, - \'www\' AS `\'www\'` - FROM system.one AS __table6 + 3 AS id, + \'www\' AS `\'www\'` + FROM system.one AS __table6 + ) ) AS __table2 USING (id) 0 10 0 20 diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql index c1ab6909e2f..b544f6829cf 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.sql @@ -9,24 +9,24 @@ ENGINE = MergeTree ORDER BY tuple(); INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map()); SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; SELECT count(n) FROM t_func_to_subcolumns; -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT count(n) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT count(n) FROM t_func_to_subcolumns; SELECT count(id) FROM t_func_to_subcolumns; -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT count(id) FROM t_func_to_subcolumns; +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT count(id) FROM t_func_to_subcolumns; SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); -EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left +EXPLAIN QUERY TREE dump_tree = 1, dump_ast = 1 SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); DROP TABLE t_func_to_subcolumns; diff --git a/tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference new file mode 100644 index 00000000000..3051c199363 --- /dev/null +++ b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.reference @@ -0,0 +1,25 @@ +3 +2 +SELECT __table1.`arr.size0` AS `length(arr)` +FROM default.t_length_1 AS __table1 +WHERE __table1.`arr.size0` IN ( + SELECT __table1.arr_length AS arr_length + FROM default.t_length_2 AS __table1 +) +2 +SELECT __table1.`arr.size0` AS `length(arr)` +FROM default.t_length_1 AS __table1 +WHERE __table1.`arr.size0` IN ( + SELECT __table1.arr_length AS arr_length + FROM default.t_length_2 AS __table1 + FINAL +) +2 +SELECT length(__table1.arr) AS `length(arr)` +FROM default.t_length_1 AS __table1 +FINAL +WHERE length(__table1.arr) IN ( + SELECT __table1.arr_length AS arr_length + FROM default.t_length_2 AS __table1 + FINAL +) diff --git a/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql new file mode 100644 index 00000000000..5975347ad09 --- /dev/null +++ b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS t_length_1; +DROP TABLE IF EXISTS t_length_2; + +SET allow_experimental_analyzer = 1; +SET optimize_on_insert = 0; + +CREATE TABLE t_length_1 (id UInt64, arr Array(UInt64)) ENGINE = ReplacingMergeTree ORDER BY id; +CREATE TABLE t_length_2 (id UInt64, arr_length UInt64) ENGINE = ReplacingMergeTree ORDER BY id; + +INSERT INTO t_length_1 VALUES (1, [1, 2, 3]), (2, [4, 5]); +INSERT INTO t_length_2 VALUES (1, 3), (1, 2), (2, 2); + +SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2); + +SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr) FROM t_length_1 WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); + +SELECT length(arr) FROM t_length_1 FINAL WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); +EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr) FROM t_length_1 FINAL WHERE length(arr) in (SELECT arr_length FROM t_length_2 FINAL); + +DROP TABLE t_length_1; +DROP TABLE t_length_2; From c0dd9b13aa09085a03b0e54b67ed4ff5c46f4336 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 4 Mar 2024 15:39:39 +0000 Subject: [PATCH 022/273] update docs --- docs/en/operations/settings/settings.md | 8 ++++---- docs/ru/operations/settings/settings.md | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 622644a1543..2273aa8c472 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1776,7 +1776,7 @@ Default value: 0 (no restriction). ## insert_quorum {#insert_quorum} :::note -This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information. +This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information. ::: Enables the quorum writes. @@ -1819,7 +1819,7 @@ See also: ## insert_quorum_parallel {#insert_quorum_parallel} :::note -This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information. +This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information. ::: Enables or disables parallelism for quorum `INSERT` queries. If enabled, additional `INSERT` queries can be sent while previous queries have not yet finished. If disabled, additional writes to the same table will be rejected. @@ -1840,7 +1840,7 @@ See also: ## select_sequential_consistency {#select_sequential_consistency} :::note -This setting differ in behavior between SharedMergeTree and ReplicatedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information about the behavior of `select_sequential_consistency` in SharedMergeTree. +This setting differ in behavior between SharedMergeTree and ReplicatedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information about the behavior of `select_sequential_consistency` in SharedMergeTree. ::: Enables or disables sequential consistency for `SELECT` queries. Requires `insert_quorum_parallel` to be disabled (enabled by default). @@ -2504,7 +2504,7 @@ Possible values: - 0 — Optimization disabled. - 1 — Optimization enabled. -Default value: `0`. +Default value: `1`. ## optimize_trivial_count_query {#optimize-trivial-count-query} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index a56afda641b..f4eecc615b2 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2077,7 +2077,7 @@ SELECT * FROM test_table - 0 — оптимизация отключена. - 1 — оптимизация включена. -Значение по умолчанию: `0`. +Значение по умолчанию: `1`. ## optimize_trivial_count_query {#optimize-trivial-count-query} @@ -2798,7 +2798,7 @@ SELECT TOP 3 name, value FROM system.settings; ``` ### output_format_pretty_color {#output_format_pretty_color} -Включает/выключает управляющие последовательности ANSI в форматах Pretty. +Включает/выключает управляющие последовательности ANSI в форматах Pretty. Возможные значения: @@ -4123,7 +4123,7 @@ SELECT sum(number) FROM numbers(10000000000) SETTINGS partial_result_on_first_ca ## session_timezone {#session_timezone} Задаёт значение часового пояса (session_timezone) по умолчанию для текущей сессии вместо [часового пояса сервера](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone). То есть, все значения DateTime/DateTime64, для которых явно не задан часовой пояс, будут интерпретированы как относящиеся к указанной зоне. -При значении настройки `''` (пустая строка), будет совпадать с часовым поясом сервера. +При значении настройки `''` (пустая строка), будет совпадать с часовым поясом сервера. Функции `timeZone()` and `serverTimezone()` возвращают часовой пояс текущей сессии и сервера соответственно. From ed3c36debefe6c8a59f8482c75ef9f70c27c9bc3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 4 Mar 2024 19:59:22 +0000 Subject: [PATCH 023/273] fix tests --- src/Core/SettingsChangesHistory.h | 3 +++ .../0_stateless/02116_tuple_element_analyzer.reference | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index face1def4b4..afb9b201f50 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,6 +85,9 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"24.3", { + {"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"}, + }}, {"24.2", { {"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, diff --git a/tests/queries/0_stateless/02116_tuple_element_analyzer.reference b/tests/queries/0_stateless/02116_tuple_element_analyzer.reference index d30f3a6cc58..22d48ffb2f3 100644 --- a/tests/queries/0_stateless/02116_tuple_element_analyzer.reference +++ b/tests/queries/0_stateless/02116_tuple_element_analyzer.reference @@ -15,8 +15,8 @@ SELECT __table1.`t2.1` AS `tupleElement(t2, 1)` FROM default.t_tuple_element AS __table1 1 2 SELECT - 1 AS `tupleElement(t, 1)`, - 2 AS `tupleElement(t, 2)` + _CAST(1, \'UInt8\') AS `tupleElement(t, 1)`, + _CAST(2, \'UInt8\') AS `tupleElement(t, 2)` FROM system.one AS __table1 1 2 SELECT From 7ac0ebbaca2c9032b04a6e77456e1c5b7f325f2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 20 Mar 2024 16:11:12 +0100 Subject: [PATCH 024/273] Test jeaiii itoa --- base/base/itoa.cpp | 522 +++++++++++++++++++++------------------------ 1 file changed, 241 insertions(+), 281 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index fd8fd8de025..4587d3e3e82 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -1,296 +1,256 @@ -// Based on https://github.com/amdn/itoa and combined with our optimizations -// -//=== itoa.cpp - Fast integer to ascii conversion --*- C++ -*-// -// -// The MIT License (MIT) -// Copyright (c) 2016 Arturo Martin-de-Nicolas -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -//===----------------------------------------------------------------------===// - -#include -#include -#include #include #include #include #include -namespace +namespace jeaiii { -template -ALWAYS_INLINE inline constexpr T pow10(size_t x) -{ - return x ? 10 * pow10(x - 1) : 1; -} +/* + MIT License -// Division by a power of 10 is implemented using a multiplicative inverse. -// This strength reduction is also done by optimizing compilers, but -// presently the fastest results are produced by using the values -// for the multiplication and the shift as given by the algorithm -// described by Agner Fog in "Optimizing Subroutines in Assembly Language" -// -// http://www.agner.org/optimize/optimizing_assembly.pdf -// -// "Integer division by a constant (all processors) -// A floating point number can be divided by a constant by multiplying -// with the reciprocal. If we want to do the same with integers, we have -// to scale the reciprocal by 2n and then shift the product to the right -// by n. There are various algorithms for finding a suitable value of n -// and compensating for rounding errors. The algorithm described below -// was invented by Terje Mathisen, Norway, and not published elsewhere." + Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa -/// Division by constant is performed by: -/// 1. Adding 1 if needed; -/// 2. Multiplying by another constant; -/// 3. Shifting right by another constant. -template -struct Division -{ - static constexpr bool add{add_}; - static constexpr UInt multiplier{multiplier_}; - static constexpr unsigned shift{shift_}; -}; + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: -/// Select a type with appropriate number of bytes from the list of types. -/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes. -/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t. -template -struct SelectType -{ - using Result = typename SelectType::Result; -}; + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. -template -struct SelectType<1, T, Ts...> -{ - using Result = T; -}; + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + using u32 = decltype(0xffffffff); + using u64 = decltype(0xffffffffffffffff); - -/// Division by 10^N where N is the size of the type. -template -using DivisionBy10PowN = typename SelectType< - N, - Division, /// divide by 10 - Division, /// divide by 100 - Division, /// divide by 10000 - Division /// divide by 100000000 - >::Result; - -template -using UnsignedOfSize = typename SelectType::Result; - -/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in -template -struct QuotientAndRemainder -{ - UnsignedOfSize quotient; // quotient with fewer than 2*N decimal digits - UnsignedOfSize remainder; // remainder with at most N decimal digits -}; - -template -QuotientAndRemainder inline split(UnsignedOfSize value) -{ - constexpr DivisionBy10PowN division; - - UnsignedOfSize quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift; - UnsignedOfSize remainder = static_cast>(value - quotient * pow10>(N)); - - return {quotient, remainder}; -} - -ALWAYS_INLINE inline char * outDigit(char * p, uint8_t value) -{ - *p = '0' + value; - ++p; - return p; -} - -// Using a lookup table to convert binary numbers from 0 to 99 -// into ascii characters as described by Andrei Alexandrescu in -// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ - -const char digits[201] = "00010203040506070809" - "10111213141516171819" - "20212223242526272829" - "30313233343536373839" - "40414243444546474849" - "50515253545556575859" - "60616263646566676869" - "70717273747576777879" - "80818283848586878889" - "90919293949596979899"; - -ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) -{ - memcpy(p, &digits[value * 2], 2); - p += 2; - return p; -} - -namespace convert -{ -template -char * head(char * p, UInt u); -template -char * tail(char * p, UInt u); - -//===----------------------------------------------------------===// -// head: find most significant digit, skip leading zeros -//===----------------------------------------------------------===// - -// "x" contains quotient and remainder after division by 10^N -// quotient is less than 10^N -template -ALWAYS_INLINE inline char * head(char * p, QuotientAndRemainder x) -{ - p = head(p, UnsignedOfSize(x.quotient)); - p = tail(p, x.remainder); - return p; -} - -// "u" is less than 10^2*N -template -ALWAYS_INLINE inline char * head(char * p, UInt u) -{ - return u < pow10>(N) ? head(p, UnsignedOfSize(u)) : head(p, split(u)); -} - -// recursion base case, selected when "u" is one byte -template <> -ALWAYS_INLINE inline char * head, 1>(char * p, UnsignedOfSize<1> u) -{ - return u < 10 ? outDigit(p, u) : outTwoDigits(p, u); -} - -//===----------------------------------------------------------===// -// tail: produce all digits including leading zeros -//===----------------------------------------------------------===// - -// recursive step, "u" is less than 10^2*N -template -ALWAYS_INLINE inline char * tail(char * p, UInt u) -{ - QuotientAndRemainder x = split(u); - p = tail(p, UnsignedOfSize(x.quotient)); - p = tail(p, x.remainder); - return p; -} - -// recursion base case, selected when "u" is one byte -template <> -ALWAYS_INLINE inline char * tail, 1>(char * p, UnsignedOfSize<1> u) -{ - return outTwoDigits(p, u); -} - -//===----------------------------------------------------------===// -// large values are >= 10^2*N -// where x contains quotient and remainder after division by 10^N -//===----------------------------------------------------------===// -template -ALWAYS_INLINE inline char * large(char * p, QuotientAndRemainder x) -{ - QuotientAndRemainder y = split(x.quotient); - p = head(p, UnsignedOfSize(y.quotient)); - p = tail(p, y.remainder); - p = tail(p, x.remainder); - return p; -} - -//===----------------------------------------------------------===// -// handle values of "u" that might be >= 10^2*N -// where N is the size of "u" in bytes -//===----------------------------------------------------------===// -template -ALWAYS_INLINE inline char * uitoa(char * p, UInt u) -{ - if (u < pow10>(N)) - return head(p, UnsignedOfSize(u)); - QuotientAndRemainder x = split(u); - - return u < pow10>(2 * N) ? head(p, x) : large(p, x); -} - -// selected when "u" is one byte -template <> -ALWAYS_INLINE inline char * uitoa, 1>(char * p, UnsignedOfSize<1> u) -{ - if (u < 10) - return outDigit(p, u); - else if (u < 100) - return outTwoDigits(p, u); - else + struct pair { - p = outDigit(p, u / 100); - p = outTwoDigits(p, u % 100); - return p; + char dd[2]; + constexpr pair(char c) : dd{ c, '\0' } { } + constexpr pair(int n) : dd{ "0123456789"[n / 10], "0123456789"[n % 10] } { } + }; + + constexpr struct + { + pair dd[100] + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + }; + pair fd[100] + { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + }; + } + digits; + + constexpr u64 mask24 = (u64(1) << 24) - 1; + constexpr u64 mask32 = (u64(1) << 32) - 1; + constexpr u64 mask57 = (u64(1) << 57) - 1; + + template struct _cond { using type = F; }; + template struct _cond { using type = T; }; + template using cond = typename _cond::type; + + template + inline ALWAYS_INLINE + char* to_text_from_integer(char* b, T i) + { + constexpr auto q = sizeof(T); + using U = cond>>; + + // convert bool to int before test with unary + to silence warning if T happens to be bool + U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i); + + if (n < u32(1e2)) + { + *reinterpret_cast(b) = digits.fd[n]; + return n < 10 ? b + 1 : b + 2; + } + if (n < u32(1e6)) + { + if (n < u32(1e4)) + { + auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= n < u32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + return b + 4; + } + auto f0 = u64(10 * (1ull << 32ull)/ 1e5 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= n < u32(1e5); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + return b + 6; + } + if (n < u64(1ull << 32ull)) + { + if (n < u32(1e8)) + { + auto f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= n < u32(1e7); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + return b + 8; + } + auto f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 57]; + b -= n < u32(1e9); + auto f2 = (f0 & mask57) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; + auto f4 = (f2 & mask57) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; + auto f6 = (f4 & mask57) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; + auto f8 = (f6 & mask57) * 100; + *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; + return b + 10; + } + + // if we get here U must be u64 but some compilers don't know that, so reassign n to a u64 to avoid warnings + u32 z = n % u32(1e8); + u64 u = n / u32(1e8); + + if (u < u32(1e2)) + { + // u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number) + *reinterpret_cast(b) = digits.dd[u]; + b += 2; + } + else if (u < u32(1e6)) + { + if (u < u32(1e4)) + { + auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= u < u32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + b += 4; + } + else + { + auto f0 = u64(10 * (1ull << 32ull) / 1e5 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= u < u32(1e5); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + b += 6; + } + } + else if (u < u32(1e8)) + { + auto f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= u < u32(1e7); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + b += 8; + } + else if (u < u64(1ull << 32ull)) + { + auto f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 57]; + b -= u < u32(1e9); + auto f2 = (f0 & mask57) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; + auto f4 = (f2 & mask57) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; + auto f6 = (f4 & mask57) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; + auto f8 = (f6 & mask57) * 100; + *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; + b += 10; + } + else + { + u32 y = u % u32(1e8); + u /= u32(1e8); + + // u is 2, 3, or 4 digits (if u < 10 it would have been handled above) + if (u < u32(1e2)) + { + *reinterpret_cast(b) = digits.dd[u]; + b += 2; + } + else + { + auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= u < u32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + b += 4; + } + // do 8 digits + auto f0 = (u64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1; + *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + b += 8; + } + // do 8 digits + auto f0 = (u64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1; + *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + return b + 8; } } -//===----------------------------------------------------------===// -// handle unsigned and signed integral operands -//===----------------------------------------------------------===// - -// itoa: handle unsigned integral operands (selected by SFINAE) -template && std::is_integral_v> * = nullptr> -ALWAYS_INLINE inline char * itoa(U u, char * p) +namespace { - return convert::uitoa(p, u); -} - -// itoa: handle signed integral operands (selected by SFINAE) -template && std::is_integral_v> * = nullptr> -ALWAYS_INLINE inline char * itoa(I i, char * p) +ALWAYS_INLINE inline void outTwoDigits(char * p, uint8_t value) { - // Need "mask" to be filled with a copy of the sign bit. - // If "i" is a negative value, then the result of "operator >>" - // is implementation-defined, though usually it is an arithmetic - // right shift that replicates the sign bit. - // Use a conditional expression to be portable, - // a good optimizing compiler generates an arithmetic right shift - // and avoids the conditional branch. - UnsignedOfSize mask = i < 0 ? ~UnsignedOfSize(0) : 0; - // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize. - // Cannot use std::abs() because the result is undefined - // in 2's complement systems for the most-negative value. - // Want to avoid conditional branch for performance reasons since - // CPU branch prediction will be ineffective when negative values - // occur randomly. - // Let "u" be "i" cast to unsigned type UnsignedOfSize. - // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative. - // This yields the absolute value with the desired type without - // using a conditional branch and without invoking undefined or - // implementation defined behavior: - UnsignedOfSize u = ((2 * UnsignedOfSize(i)) & ~mask) - UnsignedOfSize(i); - // Unconditionally store a minus sign when producing digits - // in a forward direction and increment the pointer only if - // the value is in fact negative. - // This avoids a conditional branch and is safe because we will - // always produce at least one digit and it will overwrite the - // minus sign when the value is not negative. - *p = '-'; - p += (mask & 1); - p = convert::uitoa(p, u); - return p; -} + *reinterpret_cast(p) = jeaiii::digits.fd[value]; } const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull; @@ -301,7 +261,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p) { /// If we the highest 64bit item is empty, we can print just the lowest item as u64 if (_x.items[UInt128::_impl::little(1)] == 0) - return convert::itoa(_x.items[UInt128::_impl::little(0)], p); + return jeaiii::to_text_from_integer(p, _x.items[UInt128::_impl::little(0)]); /// Doing operations using __int128 is faster and we already rely on this feature using T = unsigned __int128; @@ -332,7 +292,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p) current_block += max_multiple_of_hundred_blocks; } - char * highest_part_print = convert::itoa(uint64_t(x), p); + char * highest_part_print = jeaiii::to_text_from_integer(p, uint64_t(x)); for (int i = 0; i < current_block; i++) { outTwoDigits(highest_part_print, two_values[current_block - 1 - i]); @@ -448,12 +408,12 @@ ALWAYS_INLINE inline char * writeSIntText(T x, char * pos) char * itoa(UInt8 i, char * p) { - return convert::itoa(uint8_t(i), p); + return jeaiii::to_text_from_integer(p, uint8_t(i)); } char * itoa(Int8 i, char * p) { - return convert::itoa(int8_t(i), p); + return jeaiii::to_text_from_integer(p, int8_t(i)); } char * itoa(UInt128 i, char * p) @@ -479,7 +439,7 @@ char * itoa(Int256 i, char * p) #define DEFAULT_ITOA(T) \ char * itoa(T i, char * p) \ { \ - return convert::itoa(i, p); \ + return jeaiii::to_text_from_integer(p, i); \ } #define FOR_MISSING_INTEGER_TYPES(M) \ From 225db5e253f578483bcbeeb8f3063c241382d49e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 20 Mar 2024 17:04:52 +0100 Subject: [PATCH 025/273] Style --- base/base/itoa.cpp | 386 +++++++++++++++++++++++---------------------- 1 file changed, 194 insertions(+), 192 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index 4587d3e3e82..868fdedb176 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -28,229 +28,231 @@ namespace jeaiii OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - using u32 = decltype(0xffffffff); - using u64 = decltype(0xffffffffffffffff); +struct pair +{ + char dd[2]; + constexpr pair(char c) : dd{c, '\0'} { } + constexpr pair(int n) : dd{"0123456789"[n / 10], "0123456789"[n % 10]} { } +}; - struct pair - { - char dd[2]; - constexpr pair(char c) : dd{ c, '\0' } { } - constexpr pair(int n) : dd{ "0123456789"[n / 10], "0123456789"[n % 10] } { } +constexpr struct +{ + pair dd[100]{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, // + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, // + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, // + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, // + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, // + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, // + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, // + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, // }; + pair fd[100]{ + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', // + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, // + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, // + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, // + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, // + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, // + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, // + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, // + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, // + }; +} digits; - constexpr struct +constexpr UInt64 mask24 = (UInt64(1) << 24) - 1; +constexpr UInt64 mask32 = (UInt64(1) << 32) - 1; +constexpr UInt64 mask57 = (UInt64(1) << 57) - 1; + +template +struct _cond +{ + using type = F; +}; +template +struct _cond +{ + using type = T; +}; +template +using cond = typename _cond::type; + +template +inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) +{ + constexpr auto q = sizeof(T); + using U = cond>>; + + // convert bool to int before test with unary + to silence warning if T happens to be bool + U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i); + + if (n < UInt32(1e2)) { - pair dd[100] - { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, - 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, - }; - pair fd[100] - { - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, - 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, - }; + *reinterpret_cast(b) = digits.fd[n]; + return n < 10 ? b + 1 : b + 2; } - digits; - - constexpr u64 mask24 = (u64(1) << 24) - 1; - constexpr u64 mask32 = (u64(1) << 32) - 1; - constexpr u64 mask57 = (u64(1) << 57) - 1; - - template struct _cond { using type = F; }; - template struct _cond { using type = T; }; - template using cond = typename _cond::type; - - template - inline ALWAYS_INLINE - char* to_text_from_integer(char* b, T i) + if (n < UInt32(1e6)) { - constexpr auto q = sizeof(T); - using U = cond>>; - - // convert bool to int before test with unary + to silence warning if T happens to be bool - U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i); - - if (n < u32(1e2)) + if (n < UInt32(1e4)) { - *reinterpret_cast(b) = digits.fd[n]; - return n < 10 ? b + 1 : b + 2; + auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= n < UInt32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + return b + 4; } - if (n < u32(1e6)) + auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= n < UInt32(1e5); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + return b + 6; + } + if (n < UInt64(1ull << 32ull)) + { + if (n < UInt32(1e8)) { - if (n < u32(1e4)) - { - auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * n; - *reinterpret_cast(b) = digits.fd[f0 >> 24]; - b -= n < u32(1e3); - auto f2 = (f0 & mask24) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; - return b + 4; - } - auto f0 = u64(10 * (1ull << 32ull)/ 1e5 + 1) * n; - *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= n < u32(1e5); + auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= n < UInt32(1e7); auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; - return b + 6; - } - if (n < u64(1ull << 32ull)) - { - if (n < u32(1e8)) - { - auto f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; - *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= n < u32(1e7); - auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; - auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; - auto f6 = (f4 & mask32) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; - return b + 8; - } - auto f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * n; - *reinterpret_cast(b) = digits.fd[f0 >> 57]; - b -= n < u32(1e9); - auto f2 = (f0 & mask57) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; - auto f4 = (f2 & mask57) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; - auto f6 = (f4 & mask57) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; - auto f8 = (f6 & mask57) * 100; - *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; - return b + 10; - } - - // if we get here U must be u64 but some compilers don't know that, so reassign n to a u64 to avoid warnings - u32 z = n % u32(1e8); - u64 u = n / u32(1e8); - - if (u < u32(1e2)) - { - // u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number) - *reinterpret_cast(b) = digits.dd[u]; - b += 2; - } - else if (u < u32(1e6)) - { - if (u < u32(1e4)) - { - auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * u; - *reinterpret_cast(b) = digits.fd[f0 >> 24]; - b -= u < u32(1e3); - auto f2 = (f0 & mask24) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; - b += 4; - } - else - { - auto f0 = u64(10 * (1ull << 32ull) / 1e5 + 1) * u; - *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= u < u32(1e5); - auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; - auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; - b += 6; - } - } - else if (u < u32(1e8)) - { - auto f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16; - *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= u < u32(1e7); - auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; - auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; auto f6 = (f4 & mask32) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; - b += 8; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + return b + 8; } - else if (u < u64(1ull << 32ull)) + auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * n; + *reinterpret_cast(b) = digits.fd[f0 >> 57]; + b -= n < UInt32(1e9); + auto f2 = (f0 & mask57) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; + auto f4 = (f2 & mask57) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; + auto f6 = (f4 & mask57) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; + auto f8 = (f6 & mask57) * 100; + *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; + return b + 10; + } + + // if we get here U must be UInt64 but some compilers don't know that, so reassign n to a UInt64 to avoid warnings + UInt32 z = n % UInt32(1e8); + UInt64 u = n / UInt32(1e8); + + if (u < UInt32(1e2)) + { + // u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number) + *reinterpret_cast(b) = digits.dd[u]; + b += 2; + } + else if (u < UInt32(1e6)) + { + if (u < UInt32(1e4)) { - auto f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * u; - *reinterpret_cast(b) = digits.fd[f0 >> 57]; - b -= u < u32(1e9); - auto f2 = (f0 & mask57) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; - auto f4 = (f2 & mask57) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; - auto f6 = (f4 & mask57) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; - auto f8 = (f6 & mask57) * 100; - *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; - b += 10; + auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= u < UInt32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + b += 4; } else { - u32 y = u % u32(1e8); - u /= u32(1e8); - - // u is 2, 3, or 4 digits (if u < 10 it would have been handled above) - if (u < u32(1e2)) - { - *reinterpret_cast(b) = digits.dd[u]; - b += 2; - } - else - { - auto f0 = u32(10 * (1 << 24) / 1e3 + 1) * u; - *reinterpret_cast(b) = digits.fd[f0 >> 24]; - b -= u < u32(1e3); - auto f2 = (f0 & mask24) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; - b += 4; - } - // do 8 digits - auto f0 = (u64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1; - *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= u < UInt32(1e5); auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; - auto f6 = (f4 & mask32) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; - b += 8; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + b += 6; + } + } + else if (u < UInt32(1e8)) + { + auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16; + *reinterpret_cast(b) = digits.fd[f0 >> 32]; + b -= u < UInt32(1e7); + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + b += 8; + } + else if (u < UInt64(1ull << 32ull)) + { + auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 57]; + b -= u < UInt32(1e9); + auto f2 = (f0 & mask57) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 57]; + auto f4 = (f2 & mask57) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 57]; + auto f6 = (f4 & mask57) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 57]; + auto f8 = (f6 & mask57) * 100; + *reinterpret_cast(b + 8) = digits.dd[f8 >> 57]; + b += 10; + } + else + { + UInt32 y = u % UInt32(1e8); + u /= UInt32(1e8); + + // u is 2, 3, or 4 digits (if u < 10 it would have been handled above) + if (u < UInt32(1e2)) + { + *reinterpret_cast(b) = digits.dd[u]; + b += 2; + } + else + { + auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u; + *reinterpret_cast(b) = digits.fd[f0 >> 24]; + b -= u < UInt32(1e3); + auto f2 = (f0 & mask24) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; + b += 4; } // do 8 digits - auto f0 = (u64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1; - *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1; + *reinterpret_cast(b) = digits.dd[f0 >> 32]; auto f2 = (f0 & mask32) * 100; - *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; auto f4 = (f2 & mask32) * 100; - *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; auto f6 = (f4 & mask32) * 100; - *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; - return b + 8; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + b += 8; } + // do 8 digits + auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1; + *reinterpret_cast(b) = digits.dd[f0 >> 32]; + auto f2 = (f0 & mask32) * 100; + *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; + auto f4 = (f2 & mask32) * 100; + *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; + auto f6 = (f4 & mask32) * 100; + *reinterpret_cast(b + 6) = digits.dd[f6 >> 32]; + return b + 8; +} } namespace { ALWAYS_INLINE inline void outTwoDigits(char * p, uint8_t value) { - *reinterpret_cast(p) = jeaiii::digits.fd[value]; + *reinterpret_cast(p) = jeaiii::digits.fd[value]; } const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull; From de76be248b4f2b667fcd390874af5b296e91686f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 20 Mar 2024 17:56:01 +0100 Subject: [PATCH 026/273] Revert incorrect change on my part --- base/base/itoa.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index 868fdedb176..e7250764704 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -250,9 +250,24 @@ inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) namespace { -ALWAYS_INLINE inline void outTwoDigits(char * p, uint8_t value) +// Using a lookup table to convert binary numbers from 0 to 99 +// into ascii characters as described by Andrei Alexandrescu in +// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ +const char digits[201] = "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; +ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) { - *reinterpret_cast(p) = jeaiii::digits.fd[value]; + memcpy(p, &digits[value * 2], 2); + p += 2; + return p; } const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull; From c4fcc5946831da07bb148a1299bd2c7099c221a7 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 21 Mar 2024 16:59:30 +0000 Subject: [PATCH 027/273] fix test --- tests/queries/0_stateless/02116_tuple_element.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element.sql index ece7114e763..e3a5134f2b2 100644 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ b/tests/queries/0_stateless/02116_tuple_element.sql @@ -19,7 +19,7 @@ SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMEN SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } SELECT t2.1 FROM t_tuple_element; EXPLAIN SYNTAX SELECT t2.1 FROM t_tuple_element; @@ -31,7 +31,7 @@ SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMEN SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } DROP TABLE t_tuple_element; From 891277e01e99f21f4087c947a4fb3d448473562e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 25 Mar 2024 14:57:43 +0000 Subject: [PATCH 028/273] fix clang-tidy --- src/Storages/IStorageCluster.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h index 36ded8d5412..d219eb32f45 100644 --- a/src/Storages/IStorageCluster.h +++ b/src/Storages/IStorageCluster.h @@ -37,7 +37,7 @@ public: QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; - bool isRemote() const override final { return true; } + bool isRemote() const final { return true; } bool supportsSubcolumns() const override { return true; } bool supportsOptimizationToSubcolumns() const override { return false; } From aa36b039c1fcec8881faee73083beb007d7b83a3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 26 Mar 2024 15:02:25 +0000 Subject: [PATCH 029/273] fix test --- .../02971_functions_to_subcolumns_column_names.reference | 8 ++------ .../02971_functions_to_subcolumns_column_names.sql | 4 ++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference index 4787c660c68..03c16267db1 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference @@ -2,13 +2,9 @@ SELECT `arr.size0` AS `length(arr)`, `n.null` AS `isNull(n)` FROM t_column_names -┌─length(arr)─┬─isNull(n)─┐ -│ 3 │ 0 │ -└─────────────┴───────────┘ +{"length(arr)":"3","isNull(n)":0} SELECT __table1.`arr.size0` AS `length(arr)`, __table1.`n.null` AS `isNull(n)` FROM default.t_column_names AS __table1 -┌─length(arr)─┬─isNull(n)─┐ -│ 3 │ 0 │ -└─────────────┴───────────┘ +{"length(arr)":"3","isNull(n)":0} diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql index 89c39046df3..b867148c8ca 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql @@ -8,12 +8,12 @@ SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 0; EXPLAIN SYNTAX SELECT length(arr), isNull(n) FROM t_column_names; -SELECT length(arr), isNull(n) FROM t_column_names FORMAT PrettyCompactNoEscapes; +SELECT length(arr), isNull(n) FROM t_column_names FORMAT JSONEachRow; SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; EXPLAIN QUERY TREE dump_tree = 0, dump_ast = 1 SELECT length(arr), isNull(n) FROM t_column_names; -SELECT length(arr), isNull(n) FROM t_column_names FORMAT PrettyCompactNoEscapes; +SELECT length(arr), isNull(n) FROM t_column_names FORMAT JSONEachRow; DROP TABLE t_column_names; From 1f85889f889bfa35c8c7bc27fade1762b20906dd Mon Sep 17 00:00:00 2001 From: pufit Date: Fri, 29 Mar 2024 18:53:03 -0400 Subject: [PATCH 030/273] FuzzQuery table function --- src/Client/ClientBase.h | 2 +- src/{Client => Common}/QueryFuzzer.cpp | 50 +++-- src/{Client => Common}/QueryFuzzer.h | 1 + src/Storages/StorageFuzzQuery.cpp | 174 ++++++++++++++++++ src/Storages/StorageFuzzQuery.h | 89 +++++++++ src/Storages/registerStorages.cpp | 2 + src/TableFunctions/TableFunctionFuzzQuery.cpp | 54 ++++++ src/TableFunctions/TableFunctionFuzzQuery.h | 42 +++++ src/TableFunctions/registerTableFunctions.cpp | 1 + src/TableFunctions/registerTableFunctions.h | 1 + .../03031_table_function_fuzzquery.reference | 11 ++ .../03031_table_function_fuzzquery.sql | 18 ++ 12 files changed, 426 insertions(+), 19 deletions(-) rename src/{Client => Common}/QueryFuzzer.cpp (97%) rename src/{Client => Common}/QueryFuzzer.h (99%) create mode 100644 src/Storages/StorageFuzzQuery.cpp create mode 100644 src/Storages/StorageFuzzQuery.h create mode 100644 src/TableFunctions/TableFunctionFuzzQuery.cpp create mode 100644 src/TableFunctions/TableFunctionFuzzQuery.h create mode 100644 tests/queries/0_stateless/03031_table_function_fuzzquery.reference create mode 100644 tests/queries/0_stateless/03031_table_function_fuzzquery.sql diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 9ec87ababfc..c0188253904 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -6,13 +6,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include #include #include diff --git a/src/Client/QueryFuzzer.cpp b/src/Common/QueryFuzzer.cpp similarity index 97% rename from src/Client/QueryFuzzer.cpp rename to src/Common/QueryFuzzer.cpp index 7be01686258..137d545f82f 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Common/QueryFuzzer.cpp @@ -68,22 +68,21 @@ Field QueryFuzzer::getRandomField(int type) { case 0: { - return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) - / sizeof(*bad_int64_values))]; + return bad_int64_values[fuzz_rand() % std::size(bad_int64_values)]; } case 1: { static constexpr double values[] = {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999, 1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20, - FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))]; + FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % std::size(values)]; } case 2: { static constexpr UInt64 scales[] = {0, 1, 2, 10}; return DecimalField( - bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))], - static_cast(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))]) + bad_int64_values[fuzz_rand() % std::size(bad_int64_values)], + static_cast(scales[fuzz_rand() % std::size(scales)]) ); } default: @@ -165,7 +164,8 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - std::cerr << "erased\n"; + if (debug_output) + std::cerr << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -174,12 +174,14 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - std::cerr << fmt::format("inserted (pos {})\n", pos); + if (debug_output) + std::cerr << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - std::cerr << "inserted (0)\n"; + if (debug_output) + std::cerr << "inserted (0)\n"; } } @@ -197,7 +199,9 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - std::cerr << "erased\n"; + + if (debug_output) + std::cerr << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -206,12 +210,16 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - std::cerr << fmt::format("inserted (pos {})\n", pos); + + if (debug_output) + std::cerr << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - std::cerr << "inserted (0)\n"; + + if (debug_output) + std::cerr << "inserted (0)\n"; } } @@ -344,7 +352,8 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast) } else { - std::cerr << "No random column.\n"; + if (debug_output) + std::cerr << "No random column.\n"; } } @@ -378,7 +387,8 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) if (col) impl->children.insert(pos, col); else - std::cerr << "No random column.\n"; + if (debug_output) + std::cerr << "No random column.\n"; } // We don't have to recurse here to fuzz the children, this is handled by @@ -1360,11 +1370,15 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast) collectFuzzInfoMain(ast); fuzz(ast); - std::cout << std::endl; - WriteBufferFromOStream ast_buf(std::cout, 4096); - formatAST(*ast, ast_buf, false /*highlight*/); - ast_buf.finalize(); - std::cout << std::endl << std::endl; + if (debug_output) + { + std::cout << std::endl; + + WriteBufferFromOStream ast_buf(std::cout, 4096); + formatAST(*ast, ast_buf, false /*highlight*/); + ast_buf.finalize(); + std::cout << std::endl << std::endl; + } } } diff --git a/src/Client/QueryFuzzer.h b/src/Common/QueryFuzzer.h similarity index 99% rename from src/Client/QueryFuzzer.h rename to src/Common/QueryFuzzer.h index 6165e589cae..8a83934b620 100644 --- a/src/Client/QueryFuzzer.h +++ b/src/Common/QueryFuzzer.h @@ -38,6 +38,7 @@ struct ASTWindowDefinition; struct QueryFuzzer { pcg64 fuzz_rand{randomSeed()}; + bool debug_output = true; // We add elements to expression lists with fixed probability. Some elements // are so large, that the expected number of elements we add to them is diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp new file mode 100644 index 00000000000..c29986c7a7a --- /dev/null +++ b/src/Storages/StorageFuzzQuery.cpp @@ -0,0 +1,174 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int LOGICAL_ERROR; + extern const int INCORRECT_DATA; +} + +ColumnPtr FuzzQuerySource::createColumn() +{ + auto column = ColumnString::create(); + ColumnString::Chars & data_to = column->getChars(); + ColumnString::Offsets & offsets_to = column->getOffsets(); + + offsets_to.resize(block_size); + IColumn::Offset offset = 0; + + for (size_t row_num = 0; row_num < block_size; ++row_num) + { + ASTPtr new_query = query->clone(); + fuzzer.fuzzMain(new_query); + + WriteBufferFromOwnString out; + formatAST(*new_query, out, false); + auto data = out.str(); + size_t data_len = data.size(); + + IColumn::Offset next_offset = offset + data_len + 1; + data_to.resize(next_offset); + + std::copy(data.begin(), data.end(), &data_to[offset]); + + data_to[offset + data_len] = 0; + offsets_to[row_num] = next_offset; + + offset = next_offset; + } + + return column; +} + +StorageFuzzQuery::StorageFuzzQuery( + const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_) + : IStorage(table_id_), config(config_) +{ + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment_); + setInMemoryMetadata(storage_metadata); +} + +Pipe StorageFuzzQuery::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & /*query_info*/, + ContextPtr /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + size_t num_streams) +{ + storage_snapshot->check(column_names); + + Pipes pipes; + pipes.reserve(num_streams); + + const ColumnsDescription & our_columns = storage_snapshot->metadata->getColumns(); + Block block_header; + for (const auto & name : column_names) + { + const auto & name_type = our_columns.get(name); + MutableColumnPtr column = name_type.type->createColumn(); + block_header.insert({std::move(column), name_type.type, name_type.name}); + } + + const char * begin = config.query.data(); + const char * end = begin + config.query.size(); + + ParserQuery parser(end, 0); + auto query = parseQuery(parser, begin, end, "", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); + + for (UInt64 i = 0; i < num_streams; ++i) + pipes.emplace_back(std::make_shared(max_block_size, block_header, config, query)); + + return Pipe::unitePipes(std::move(pipes)); +} + +static constexpr std::array optional_configuration_keys = {"query_str", "random_seed"}; + +void StorageFuzzQuery::processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection) +{ + validateNamedCollection( + collection, + std::unordered_set(), + std::unordered_set(optional_configuration_keys.begin(), optional_configuration_keys.end())); + + if (collection.has("query")) + configuration.query = collection.get("query"); + + if (collection.has("random_seed")) + configuration.random_seed = collection.get("random_seed"); +} + +StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine_args, ContextPtr local_context) +{ + StorageFuzzQuery::Configuration configuration{}; + + if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) + { + StorageFuzzQuery::processNamedCollectionResult(configuration, *named_collection); + } + else + { + // Supported signatures: + // + // FuzzQuery('query') + // FuzzQuery('query', 'random_seed') + if (engine_args.empty() || engine_args.size() > 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 2 arguments: query, random_seed"); + + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); + + auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); + configuration.query = std::move(first_arg); + + if (engine_args.size() == 2) + { + const auto & literal = engine_args[1]->as(); + if (!literal.value.isNull()) + configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); + } + } + return configuration; +} + +void registerStorageFuzzQuery(StorageFactory & factory) +{ + factory.registerStorage( + "FuzzQuery", + [](const StorageFactory::Arguments & args) -> std::shared_ptr + { + ASTs & engine_args = args.engine_args; + + if (engine_args.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage FuzzQuery must have arguments."); + + StorageFuzzQuery::Configuration configuration = StorageFuzzQuery::getConfiguration(engine_args, args.getLocalContext()); + + for (const auto& col : args.columns) + if (col.type->getTypeId() != TypeIndex::String) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "'StorageFuzzQuery' supports only columns of String type, got {}.", col.type->getName()); + + return std::make_shared(args.table_id, args.columns, args.comment, configuration); + }); +} + +} diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h new file mode 100644 index 00000000000..47142a81f16 --- /dev/null +++ b/src/Storages/StorageFuzzQuery.h @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include +#include + +#include "config.h" + +namespace DB +{ + +class NamedCollection; + +class StorageFuzzQuery final : public IStorage +{ +public: + struct Configuration : public StatelessTableEngineConfiguration + { + String query = ""; + UInt64 random_seed = randomSeed(); + }; + + StorageFuzzQuery( + const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_); + + std::string getName() const override { return "FuzzQuery"; } + + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + static void processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection); + + static StorageFuzzQuery::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); + +private: + const Configuration config; +}; + + +class FuzzQuerySource : public ISource +{ +public: + FuzzQuerySource( + UInt64 block_size_, Block block_header_, const StorageFuzzQuery::Configuration & config_, ASTPtr query_) + : ISource(block_header_) + , block_size(block_size_) + , block_header(std::move(block_header_)) + , config(config_) + , query(query_) + { + fuzzer.fuzz_rand = config_.random_seed; + } + + String getName() const override { return "FuzzQuery"; } + +protected: + Chunk generate() override + { + Columns columns; + columns.reserve(block_header.columns()); + for (const auto & col : block_header) + { + chassert(col.type->getTypeId() == TypeIndex::String); + columns.emplace_back(createColumn()); + } + + return {std::move(columns), block_size}; + } + +private: + ColumnPtr createColumn(); + + UInt64 block_size; + Block block_header; + + StorageFuzzQuery::Configuration config; + ASTPtr query; + + QueryFuzzer fuzzer; +}; + +} diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index dea9feaf28b..f66d3ec3bfc 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -25,6 +25,7 @@ void registerStorageLiveView(StorageFactory & factory); void registerStorageGenerateRandom(StorageFactory & factory); void registerStorageExecutable(StorageFactory & factory); void registerStorageWindowView(StorageFactory & factory); +void registerStorageFuzzQuery(StorageFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerStorageFuzzJSON(StorageFactory & factory); #endif @@ -126,6 +127,7 @@ void registerStorages() registerStorageGenerateRandom(factory); registerStorageExecutable(factory); registerStorageWindowView(factory); + registerStorageFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerStorageFuzzJSON(factory); #endif diff --git a/src/TableFunctions/TableFunctionFuzzQuery.cpp b/src/TableFunctions/TableFunctionFuzzQuery.cpp new file mode 100644 index 00000000000..224f6666556 --- /dev/null +++ b/src/TableFunctions/TableFunctionFuzzQuery.cpp @@ -0,0 +1,54 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +void TableFunctionFuzzQuery::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + ASTs & args_func = ast_function->children; + + if (args_func.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments", getName()); + + auto args = args_func.at(0)->children; + configuration = StorageFuzzQuery::getConfiguration(args, context); +} + +StoragePtr TableFunctionFuzzQuery::executeImpl( + const ASTPtr & /*ast_function*/, + ContextPtr context, + const std::string & table_name, + ColumnsDescription /*cached_columns*/, + bool is_insert_query) const +{ + ColumnsDescription columns = getActualTableStructure(context, is_insert_query); + auto res = std::make_shared( + StorageID(getDatabaseName(), table_name), + columns, + /* comment */ String{}, + configuration); + res->startup(); + return res; +} + +void registerTableFunctionFuzzQuery(TableFunctionFactory & factory) +{ + factory.registerFunction( + {.documentation + = {.description = "Perturbs a query string with random variations.", + .returned_value = "A table object with a single column containing perturbed query strings."}, + .allow_readonly = true}); +} + +} diff --git a/src/TableFunctions/TableFunctionFuzzQuery.h b/src/TableFunctions/TableFunctionFuzzQuery.h new file mode 100644 index 00000000000..22d10341c4d --- /dev/null +++ b/src/TableFunctions/TableFunctionFuzzQuery.h @@ -0,0 +1,42 @@ +#pragma once + +#include + +#include +#include +#include + +#include "config.h" + +namespace DB +{ + +class TableFunctionFuzzQuery : public ITableFunction +{ +public: + static constexpr auto name = "fuzzQuery"; + std::string getName() const override { return name; } + + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + + ColumnsDescription getActualTableStructure(ContextPtr /* context */, bool /* is_insert_query */) const override + { + return ColumnsDescription{{"query", std::make_shared()}}; + } + +private: + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return "fuzzQuery"; } + + String source; + std::optional random_seed; + StorageFuzzQuery::Configuration configuration; +}; + +} diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index 927457ff9f6..2952d0b7b70 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -25,6 +25,7 @@ void registerTableFunctions() registerTableFunctionMongoDB(factory); registerTableFunctionRedis(factory); registerTableFunctionMergeTreeIndex(factory); + registerTableFunctionFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerTableFunctionFuzzJSON(factory); #endif diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 296af146faf..eef262490bf 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -22,6 +22,7 @@ void registerTableFunctionGenerate(TableFunctionFactory & factory); void registerTableFunctionMongoDB(TableFunctionFactory & factory); void registerTableFunctionRedis(TableFunctionFactory & factory); void registerTableFunctionMergeTreeIndex(TableFunctionFactory & factory); +void registerTableFunctionFuzzQuery(TableFunctionFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerTableFunctionFuzzJSON(TableFunctionFactory & factory); #endif diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference new file mode 100644 index 00000000000..d598037127f --- /dev/null +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference @@ -0,0 +1,11 @@ +SELECT 1 +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC +SELECT\n item_id,\n *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], toNullable(\'Array(String)\')) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(materialize(toLowCardinality(3)))\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString(count(), (number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC NULLS FIRST,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n [toString((number % 2) * 2)],\n CAST([toString(number % 2)], toNullable(\'Array(LowCardinality(String))\')) AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE (number % 2) * toUInt128(2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC +SELECT\n toString((number % 2) * 2),\n *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql new file mode 100644 index 00000000000..5f5bb4b23e4 --- /dev/null +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql @@ -0,0 +1,18 @@ + +SELECT * FROM fuzzQuery('SELECT 1', 8956) LIMIT 1; + +SELECT * FROM fuzzQuery('SELECT * +FROM ( + SELECT + ([toString(number % 2)] :: Array(LowCardinality(String))) AS item_id, + count() + FROM numbers(3) + GROUP BY item_id WITH TOTALS +) AS l FULL JOIN ( + SELECT + ([toString((number % 2) * 2)] :: Array(String)) AS item_id + FROM numbers(3) +) AS r +ON l.item_id = r.item_id +ORDER BY 1,2,3; +', 8956) LIMIT 10; From 77e28e29e9b2550204ea8d60647115587f36673e Mon Sep 17 00:00:00 2001 From: pufit Date: Sat, 30 Mar 2024 14:40:18 -0400 Subject: [PATCH 031/273] fix style --- src/Storages/StorageFuzzQuery.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp index c29986c7a7a..56b6a4de2a6 100644 --- a/src/Storages/StorageFuzzQuery.cpp +++ b/src/Storages/StorageFuzzQuery.cpp @@ -19,8 +19,6 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int LOGICAL_ERROR; - extern const int INCORRECT_DATA; } ColumnPtr FuzzQuerySource::createColumn() From 6e611f7e81fe9977dd88f81cc4c24f3e1620ae8d Mon Sep 17 00:00:00 2001 From: pufit Date: Mon, 8 Apr 2024 16:18:29 +0200 Subject: [PATCH 032/273] fix review --- .../table-functions/fuzzQuery.md | 35 +++++++++ src/Common/QueryFuzzer.cpp | 40 +++++----- src/Common/QueryFuzzer.h | 37 ++++++--- src/Storages/StorageFuzzQuery.cpp | 75 +++++++++---------- src/Storages/StorageFuzzQuery.h | 4 +- .../03031_table_function_fuzzquery.reference | 18 ++--- 6 files changed, 124 insertions(+), 85 deletions(-) create mode 100644 docs/en/sql-reference/table-functions/fuzzQuery.md diff --git a/docs/en/sql-reference/table-functions/fuzzQuery.md b/docs/en/sql-reference/table-functions/fuzzQuery.md new file mode 100644 index 00000000000..ff8cfd1cd3b --- /dev/null +++ b/docs/en/sql-reference/table-functions/fuzzQuery.md @@ -0,0 +1,35 @@ +--- +slug: /en/sql-reference/table-functions/fuzzQuery +sidebar_position: 75 +sidebar_label: fuzzQuery +--- + +# fuzzQuery + +Perturbs the given query string with random variations. + +``` sql +fuzzQuery(query[, random_seed]) +``` + +**Arguments** + +- `query` (String) - The source query to perform the fuzzing on. +- `random_seed` (UInt64) - A random seed for producing stable results. + +**Returned Value** + +A table object with a single column containing perturbed query strings. + +## Usage Example + +``` sql +SELECT * FROM fuzzQuery('SELECT materialize(\'a\' AS key) GROUP BY key') LIMIT 2; +``` + +``` + ┌─query──────────────────────────────────────────────────────────┐ +1. │ SELECT 'a' AS key GROUP BY key │ +2. │ EXPLAIN PIPELINE compact = true SELECT 'a' AS key GROUP BY key │ + └────────────────────────────────────────────────────────────────┘ +``` diff --git a/src/Common/QueryFuzzer.cpp b/src/Common/QueryFuzzer.cpp index 137d545f82f..6dd51033e3c 100644 --- a/src/Common/QueryFuzzer.cpp +++ b/src/Common/QueryFuzzer.cpp @@ -164,8 +164,8 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - if (debug_output) - std::cerr << "erased\n"; + if (debug_stream) + *debug_stream << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -174,14 +174,14 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - if (debug_output) - std::cerr << fmt::format("inserted (pos {})\n", pos); + if (debug_stream) + *debug_stream << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - if (debug_output) - std::cerr << "inserted (0)\n"; + if (debug_stream) + *debug_stream << "inserted (0)\n"; } } @@ -200,8 +200,8 @@ Field QueryFuzzer::fuzzField(Field field) size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - if (debug_output) - std::cerr << "erased\n"; + if (debug_stream) + *debug_stream << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -211,15 +211,15 @@ Field QueryFuzzer::fuzzField(Field field) size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - if (debug_output) - std::cerr << fmt::format("inserted (pos {})\n", pos); + if (debug_stream) + *debug_stream << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - if (debug_output) - std::cerr << "inserted (0)\n"; + if (debug_stream) + *debug_stream << "inserted (0)\n"; } } @@ -352,8 +352,8 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast) } else { - if (debug_output) - std::cerr << "No random column.\n"; + if (debug_stream) + *debug_stream << "No random column.\n"; } } @@ -387,8 +387,8 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) if (col) impl->children.insert(pos, col); else - if (debug_output) - std::cerr << "No random column.\n"; + if (debug_stream) + *debug_stream << "No random column.\n"; } // We don't have to recurse here to fuzz the children, this is handled by @@ -1370,14 +1370,14 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast) collectFuzzInfoMain(ast); fuzz(ast); - if (debug_output) + if (out_stream) { - std::cout << std::endl; + *out_stream << std::endl; - WriteBufferFromOStream ast_buf(std::cout, 4096); + WriteBufferFromOStream ast_buf(*out_stream, 4096); formatAST(*ast, ast_buf, false /*highlight*/); ast_buf.finalize(); - std::cout << std::endl << std::endl; + *out_stream << std::endl << std::endl; } } diff --git a/src/Common/QueryFuzzer.h b/src/Common/QueryFuzzer.h index 8a83934b620..3cf0381e044 100644 --- a/src/Common/QueryFuzzer.h +++ b/src/Common/QueryFuzzer.h @@ -35,10 +35,32 @@ struct ASTWindowDefinition; * queries, so you want to feed it a lot of queries to get some interesting mix * of them. Normally we feed SQL regression tests to it. */ -struct QueryFuzzer +class QueryFuzzer { - pcg64 fuzz_rand{randomSeed()}; - bool debug_output = true; +public: + + QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = &std::cout, std::ostream * debug_stream_ = &std::cerr) + : fuzz_rand(fuzz_rand_) + , out_stream(out_stream_) + , debug_stream(debug_stream_) + { + } + + // This is the only function you have to call -- it will modify the passed + // ASTPtr to point to new AST with some random changes. + void fuzzMain(ASTPtr & ast); + + ASTs getInsertQueriesForFuzzedTables(const String & full_query); + ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); + void notifyQueryFailed(ASTPtr ast); + + static bool isSuitableForFuzzing(const ASTCreateQuery & create); + +private: + pcg64 fuzz_rand; + + std::ostream * out_stream = nullptr; + std::ostream * debug_stream = nullptr; // We add elements to expression lists with fixed probability. Some elements // are so large, that the expected number of elements we add to them is @@ -67,10 +89,6 @@ struct QueryFuzzer std::unordered_map index_of_fuzzed_table; std::set created_tables_hashes; - // This is the only function you have to call -- it will modify the passed - // ASTPtr to point to new AST with some random changes. - void fuzzMain(ASTPtr & ast); - // Various helper functions follow, normally you shouldn't have to call them. Field getRandomField(int type); Field fuzzField(Field field); @@ -78,9 +96,6 @@ struct QueryFuzzer ASTPtr getRandomExpressionList(); DataTypePtr fuzzDataType(DataTypePtr type); DataTypePtr getRandomType(); - ASTs getInsertQueriesForFuzzedTables(const String & full_query); - ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); - void notifyQueryFailed(ASTPtr ast); void replaceWithColumnLike(ASTPtr & ast); void replaceWithTableLike(ASTPtr & ast); void fuzzOrderByElement(ASTOrderByElement * elem); @@ -103,8 +118,6 @@ struct QueryFuzzer void addTableLike(ASTPtr ast); void addColumnLike(ASTPtr ast); void collectFuzzInfoRecurse(ASTPtr ast); - - static bool isSuitableForFuzzing(const ASTCreateQuery & create); }; } diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp index 56b6a4de2a6..e2b836c98b9 100644 --- a/src/Storages/StorageFuzzQuery.cpp +++ b/src/Storages/StorageFuzzQuery.cpp @@ -30,16 +30,29 @@ ColumnPtr FuzzQuerySource::createColumn() offsets_to.resize(block_size); IColumn::Offset offset = 0; - for (size_t row_num = 0; row_num < block_size; ++row_num) + auto fuzz_base = query; + size_t row_num = 0; + + while (row_num < block_size) { - ASTPtr new_query = query->clone(); + ASTPtr new_query = fuzz_base->clone(); + + auto base_before_fuzz = fuzz_base->formatForErrorMessage(); fuzzer.fuzzMain(new_query); + auto fuzzed_text = new_query->formatForErrorMessage(); WriteBufferFromOwnString out; formatAST(*new_query, out, false); auto data = out.str(); size_t data_len = data.size(); + /// AST is too long, will start from the original query. + if (data_len > 500) + { + fuzz_base = query; + continue; + } + IColumn::Offset next_offset = offset + data_len + 1; data_to.resize(next_offset); @@ -49,6 +62,8 @@ ColumnPtr FuzzQuerySource::createColumn() offsets_to[row_num] = next_offset; offset = next_offset; + fuzz_base = new_query; + ++row_num; } return column; @@ -99,52 +114,30 @@ Pipe StorageFuzzQuery::read( return Pipe::unitePipes(std::move(pipes)); } -static constexpr std::array optional_configuration_keys = {"query_str", "random_seed"}; - -void StorageFuzzQuery::processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection) -{ - validateNamedCollection( - collection, - std::unordered_set(), - std::unordered_set(optional_configuration_keys.begin(), optional_configuration_keys.end())); - - if (collection.has("query")) - configuration.query = collection.get("query"); - - if (collection.has("random_seed")) - configuration.random_seed = collection.get("random_seed"); -} - StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine_args, ContextPtr local_context) { StorageFuzzQuery::Configuration configuration{}; - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) + // Supported signatures: + // + // FuzzQuery('query') + // FuzzQuery('query', 'random_seed') + if (engine_args.empty() || engine_args.size() > 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 2 arguments: query, random_seed"); + + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); + + auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); + configuration.query = std::move(first_arg); + + if (engine_args.size() == 2) { - StorageFuzzQuery::processNamedCollectionResult(configuration, *named_collection); + const auto & literal = engine_args[1]->as(); + if (!literal.value.isNull()) + configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); } - else - { - // Supported signatures: - // - // FuzzQuery('query') - // FuzzQuery('query', 'random_seed') - if (engine_args.empty() || engine_args.size() > 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 2 arguments: query, random_seed"); - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); - configuration.query = std::move(first_arg); - - if (engine_args.size() == 2) - { - const auto & literal = engine_args[1]->as(); - if (!literal.value.isNull()) - configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); - } - } return configuration; } diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h index 47142a81f16..e948d0b2acf 100644 --- a/src/Storages/StorageFuzzQuery.h +++ b/src/Storages/StorageFuzzQuery.h @@ -35,8 +35,6 @@ public: size_t max_block_size, size_t num_streams) override; - static void processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection); - static StorageFuzzQuery::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); private: @@ -54,8 +52,8 @@ public: , block_header(std::move(block_header_)) , config(config_) , query(query_) + , fuzzer(config_.random_seed, /* out_stream= */ nullptr, /* debug_stream= */ nullptr) { - fuzzer.fuzz_rand = config_.random_seed; } String getName() const override { return "FuzzQuery"; } diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference index d598037127f..c5b92291207 100644 --- a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference @@ -1,11 +1,11 @@ SELECT 1 SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC -SELECT\n item_id,\n *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], toNullable(\'Array(String)\')) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(materialize(toLowCardinality(3)))\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString(count(), (number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC NULLS FIRST,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n [toString((number % 2) * 2)],\n CAST([toString(number % 2)], toNullable(\'Array(LowCardinality(String))\')) AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE (number % 2) * toUInt128(2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC -SELECT\n toString((number % 2) * 2),\n *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(materialize(toLowCardinality(3)))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC +SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC NULLS LAST +SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\'),\n CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC NULLS FIRST +SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\'),\n CAST([toString(multiply(number % 2, item_id, 2))]) AS item_id\n FROM numbers(3)\n WHERE \'Array(LowCardinality(String))\'\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 DESC NULLS FIRST +SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC From 4e1f98ee7b78d48362a6788109658c8de859abd5 Mon Sep 17 00:00:00 2001 From: pufit Date: Tue, 9 Apr 2024 14:53:28 +0200 Subject: [PATCH 033/273] removed cout,cerr from src --- src/Client/ClientBase.h | 2 +- src/Common/QueryFuzzer.h | 3 +-- src/Storages/StorageFuzzQuery.h | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index c0188253904..2b05878c176 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -298,7 +298,7 @@ protected: bool send_external_tables = false; NameToNameMap query_parameters; /// Dictionary with query parameters for prepared statements. - QueryFuzzer fuzzer; + QueryFuzzer fuzzer{randomSeed(), &std::cout, &std::cerr}; int query_fuzzer_runs = 0; int create_query_fuzzer_runs = 0; diff --git a/src/Common/QueryFuzzer.h b/src/Common/QueryFuzzer.h index 3cf0381e044..bf87bdfb24e 100644 --- a/src/Common/QueryFuzzer.h +++ b/src/Common/QueryFuzzer.h @@ -38,8 +38,7 @@ struct ASTWindowDefinition; class QueryFuzzer { public: - - QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = &std::cout, std::ostream * debug_stream_ = &std::cerr) + QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr) : fuzz_rand(fuzz_rand_) , out_stream(out_stream_) , debug_stream(debug_stream_) diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h index e948d0b2acf..40833190895 100644 --- a/src/Storages/StorageFuzzQuery.h +++ b/src/Storages/StorageFuzzQuery.h @@ -52,7 +52,7 @@ public: , block_header(std::move(block_header_)) , config(config_) , query(query_) - , fuzzer(config_.random_seed, /* out_stream= */ nullptr, /* debug_stream= */ nullptr) + , fuzzer(config_.random_seed) { } From 3bbf86d34506b74a92ca53e857d4c35b9406a08b Mon Sep 17 00:00:00 2001 From: pufit Date: Tue, 9 Apr 2024 14:54:58 +0200 Subject: [PATCH 034/273] update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index ee3ef1ae795..9d9bea11b82 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1581,6 +1581,7 @@ fuzzBits fuzzJSON fuzzer fuzzers +fuzzQuery gRPC gccMurmurHash gcem From 6329dc812462363ff6edf0388239f6bead2efacd Mon Sep 17 00:00:00 2001 From: pufit Date: Tue, 9 Apr 2024 15:05:49 +0200 Subject: [PATCH 035/273] fix --- programs/client/Client.h | 5 ++++- src/Client/ClientBase.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/programs/client/Client.h b/programs/client/Client.h index 11d9dec97b1..122b8e5ab3f 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -9,7 +9,10 @@ namespace DB class Client : public ClientBase { public: - Client() = default; + Client() + { + fuzzer = QueryFuzzer(randomSeed(), &std::cout, &std::cerr); + } void initialize(Poco::Util::Application & self) override; diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 2b05878c176..c0188253904 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -298,7 +298,7 @@ protected: bool send_external_tables = false; NameToNameMap query_parameters; /// Dictionary with query parameters for prepared statements. - QueryFuzzer fuzzer{randomSeed(), &std::cout, &std::cerr}; + QueryFuzzer fuzzer; int query_fuzzer_runs = 0; int create_query_fuzzer_runs = 0; From 76415ba3523921de138feb39fca7fbc65b8f6dc5 Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 10 Apr 2024 15:27:53 +0200 Subject: [PATCH 036/273] add `explicit`, more stable tests --- src/Common/QueryFuzzer.h | 2 +- .../03031_table_function_fuzzquery.reference | 13 ++----------- .../0_stateless/03031_table_function_fuzzquery.sql | 4 ++-- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/src/Common/QueryFuzzer.h b/src/Common/QueryFuzzer.h index bf87bdfb24e..35d088809f2 100644 --- a/src/Common/QueryFuzzer.h +++ b/src/Common/QueryFuzzer.h @@ -38,7 +38,7 @@ struct ASTWindowDefinition; class QueryFuzzer { public: - QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr) + explicit QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr) : fuzz_rand(fuzz_rand_) , out_stream(out_stream_) , debug_stream(debug_stream_) diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference index c5b92291207..202e4557a33 100644 --- a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference @@ -1,11 +1,2 @@ -SELECT 1 -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(materialize(toLowCardinality(3)))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC -SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 ASC NULLS LAST -SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(toLowCardinality(3))\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\'),\n CAST([toString((number % 2) * 2)]) AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 ASC,\n 3 DESC NULLS FIRST -SELECT *\nFROM\n(\n SELECT CAST(\'Array(LowCardinality(String))\') AS item_id\n FROM numbers(3)\n WHERE toString(number % 2)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\'),\n CAST([toString(multiply(number % 2, item_id, 2))]) AS item_id\n FROM numbers(3)\n WHERE \'Array(LowCardinality(String))\'\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 DESC NULLS FIRST -SELECT *\nFROM\n(\n SELECT\n CAST([toString(number % 2)], \'Array(LowCardinality(String))\') AS item_id,\n count()\n FROM numbers(3)\n GROUP BY item_id\n WITH TOTALS\n) AS l\nFULL OUTER JOIN\n(\n SELECT CAST([toString((number % 2) * 2)], \'Array(String)\') AS item_id\n FROM numbers(3)\n) AS r ON l.item_id = r.item_id\nORDER BY\n 1 ASC,\n 2 DESC,\n 3 ASC +query +String diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql index 5f5bb4b23e4..5821e2e5111 100644 --- a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql @@ -1,5 +1,5 @@ -SELECT * FROM fuzzQuery('SELECT 1', 8956) LIMIT 1; +SELECT * FROM fuzzQuery('SELECT 1', 8956) LIMIT 0 FORMAT TSVWithNamesAndTypes; SELECT * FROM fuzzQuery('SELECT * FROM ( @@ -15,4 +15,4 @@ FROM ( ) AS r ON l.item_id = r.item_id ORDER BY 1,2,3; -', 8956) LIMIT 10; +', 8956) LIMIT 10 FORMAT NULL; From 4df3cf3b151f407c4daaa7f8df80a6fddb836280 Mon Sep 17 00:00:00 2001 From: pufit Date: Thu, 11 Apr 2024 16:57:45 +0200 Subject: [PATCH 037/273] Update StorageFuzzQuery.h --- src/Storages/StorageFuzzQuery.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h index 40833190895..3ae506fdfb8 100644 --- a/src/Storages/StorageFuzzQuery.h +++ b/src/Storages/StorageFuzzQuery.h @@ -17,7 +17,7 @@ class StorageFuzzQuery final : public IStorage public: struct Configuration : public StatelessTableEngineConfiguration { - String query = ""; + String query; UInt64 random_seed = randomSeed(); }; From 562d76ccab6ffc53c9059139fead3b2e41ff4725 Mon Sep 17 00:00:00 2001 From: pufit Date: Fri, 12 Apr 2024 11:19:44 +0200 Subject: [PATCH 038/273] Update StorageFuzzQuery.cpp --- src/Storages/StorageFuzzQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp index e2b836c98b9..5e29a04427b 100644 --- a/src/Storages/StorageFuzzQuery.cpp +++ b/src/Storages/StorageFuzzQuery.cpp @@ -105,7 +105,7 @@ Pipe StorageFuzzQuery::read( const char * begin = config.query.data(); const char * end = begin + config.query.size(); - ParserQuery parser(end, 0); + ParserQuery parser(end, false); auto query = parseQuery(parser, begin, end, "", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); for (UInt64 i = 0; i < num_streams; ++i) From 9ed3acce8223485798b22bcffcd7bd8d595cd025 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 10 May 2024 18:22:23 +0000 Subject: [PATCH 039/273] refactoring near azure blob storage --- src/Backups/BackupIO_AzureBlobStorage.cpp | 96 +++--- src/Backups/BackupIO_AzureBlobStorage.h | 15 +- .../registerBackupEngineAzureBlobStorage.cpp | 66 ++-- src/Core/Settings.h | 31 +- .../IO/WriteBufferFromAzureBlobStorage.cpp | 4 +- .../IO/WriteBufferFromAzureBlobStorage.h | 2 +- .../AzureBlobStorage/AzureBlobStorageAuth.h | 58 ---- ...ageAuth.cpp => AzureBlobStorageCommon.cpp} | 276 +++++++++------- .../AzureBlobStorage/AzureBlobStorageCommon.h | 139 ++++++++ .../AzureBlobStorage/AzureObjectStorage.cpp | 37 ++- .../AzureBlobStorage/AzureObjectStorage.h | 75 +---- .../Cached/CachedObjectStorage.h | 2 +- src/Disks/ObjectStorages/IObjectStorage.h | 2 +- .../ObjectStorages/ObjectStorageFactory.cpp | 17 +- .../copyAzureBlobStorageFile.cpp | 9 +- .../copyAzureBlobStorageFile.h | 4 +- src/Storages/StorageAzureBlob.cpp | 308 +++++------------- src/Storages/StorageAzureBlob.h | 26 +- src/Storages/StorageAzureBlobCluster.cpp | 5 +- .../TableFunctionAzureBlobStorage.cpp | 65 ++-- .../TableFunctionAzureBlobStorageCluster.cpp | 17 +- 21 files changed, 604 insertions(+), 650 deletions(-) delete mode 100644 src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h rename src/Disks/ObjectStorages/AzureBlobStorage/{AzureBlobStorageAuth.cpp => AzureBlobStorageCommon.cpp} (53%) create mode 100644 src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index a3998431674..3f60ed5c0b4 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -30,21 +31,21 @@ namespace ErrorCodes } BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( - StorageAzureBlob::Configuration configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_) : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} - , configuration(configuration_) + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.endpoint.container_name, false, false} + , connection_params(connection_params_) + , blob_path(blob_path_) { - auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); - client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); - + auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); object_storage = std::make_unique("BackupReaderAzureBlobStorage", std::move(client_ptr), - StorageAzureBlob::createSettings(context_), - configuration_.container); + AzureBlobStorage::getRequestSettings(context_->getSettingsRef()), + connection_params.endpoint.container_name); client = object_storage->getAzureBlobStorageClient(); settings = object_storage->getSettings(); } @@ -53,20 +54,20 @@ BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default; bool BackupReaderAzureBlobStorage::fileExists(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return object_storage->exists(StoredObject(key)); } UInt64 BackupReaderAzureBlobStorage::getFileSize(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); return object_metadata.size_bytes; } std::unique_ptr BackupReaderAzureBlobStorage::readFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); @@ -81,23 +82,23 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, && (destination_data_source_description.is_encrypted == encrypted_in_backup)) { LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName()); - auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional &) -> size_t + auto write_blob_function = [&](const Strings & dst_blob_path, WriteMode mode, const std::optional &) -> size_t { /// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files. - if (blob_path.size() != 2 || mode != WriteMode::Rewrite) + if (dst_blob_path.size() != 2 || mode != WriteMode::Rewrite) throw Exception(ErrorCodes::LOGICAL_ERROR, "Blob writing function called with unexpected blob_path.size={} or mode={}", - blob_path.size(), mode); + dst_blob_path.size(), mode); copyAzureBlobStorageFile( client, destination_disk->getObjectStorage()->getAzureBlobStorageClient(), - configuration.container, - fs::path(configuration.blob_path) / path_in_backup, + connection_params.endpoint.container_name, + fs::path(blob_path) / path_in_backup, 0, file_size, - /* dest_container */ blob_path[1], - /* dest_path */ blob_path[0], + /* dest_container */ dst_blob_path[1], + /* dest_path */ dst_blob_path[0], settings, read_settings, threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupRDAzure")); @@ -115,22 +116,25 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( - StorageAzureBlob::Configuration configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} - , configuration(configuration_) + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.endpoint.container_name, false, false} + , connection_params(connection_params_) + , blob_path(blob_path_) { - auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false, attempt_to_create_container); - client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); + if (!attempt_to_create_container) + connection_params.endpoint.container_already_exists = true; + auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); object_storage = std::make_unique("BackupWriterAzureBlobStorage", std::move(client_ptr), - StorageAzureBlob::createSettings(context_), - configuration_.container); + AzureBlobStorage::getRequestSettings(context_->getSettingsRef()), + connection_params.endpoint.container_name); client = object_storage->getAzureBlobStorageClient(); settings = object_storage->getSettings(); } @@ -144,18 +148,18 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu { /// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage container. /// In this case we can't use the native copy. - if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2) + if (auto src_blob_path = src_disk->getBlobPath(src_path); src_blob_path.size() == 2) { LOG_TRACE(log, "Copying file {} from disk {} to AzureBlobStorag", src_path, src_disk->getName()); copyAzureBlobStorageFile( src_disk->getObjectStorage()->getAzureBlobStorageClient(), client, - /* src_container */ blob_path[1], - /* src_path */ blob_path[0], + /* src_container */ src_blob_path[1], + /* src_path */ src_blob_path[0], start_pos, length, - configuration.container, - fs::path(configuration.blob_path) / path_in_backup, + connection_params.endpoint.container_name, + fs::path(blob_path) / path_in_backup, settings, read_settings, threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); @@ -173,11 +177,11 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St copyAzureBlobStorageFile( client, client, - configuration.container, - fs::path(configuration.blob_path)/ source, + connection_params.endpoint.container_name, + fs::path(blob_path)/ source, 0, size, - /* dest_container */ configuration.container, + /* dest_container */ connection_params.endpoint.container_name, /* dest_path */ destination, settings, read_settings, @@ -186,21 +190,29 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) { - copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, fs::path(configuration.blob_path) / path_in_backup, settings, - threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); + copyDataToAzureBlobStorageFile( + create_read_buffer, + start_pos, + length, + client, + connection_params.endpoint.container_name, + fs::path(blob_path) / path_in_backup, + settings, + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), + "BackupWRAzure")); } BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default; bool BackupWriterAzureBlobStorage::fileExists(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return object_storage->exists(StoredObject(key)); } UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; RelativePathsWithMetadata children; object_storage->listObjects(key,children,/*max_keys*/0); if (children.empty()) @@ -210,7 +222,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); @@ -218,7 +230,7 @@ std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, @@ -230,7 +242,7 @@ std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const Strin void BackupWriterAzureBlobStorage::removeFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; StoredObject object(key); object_storage->removeObjectIfExists(object); } @@ -239,7 +251,7 @@ void BackupWriterAzureBlobStorage::removeFiles(const Strings & file_names) { StoredObjects objects; for (const auto & file_name : file_names) - objects.emplace_back(fs::path(configuration.blob_path) / file_name); + objects.emplace_back(fs::path(blob_path) / file_name); object_storage->removeObjectsIfExist(objects); @@ -249,7 +261,7 @@ void BackupWriterAzureBlobStorage::removeFilesBatch(const Strings & file_names) { StoredObjects objects; for (const auto & file_name : file_names) - objects.emplace_back(fs::path(configuration.blob_path) / file_name); + objects.emplace_back(fs::path(blob_path) / file_name); object_storage->removeObjectsIfExist(objects); } diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h index f0b9aace4d4..0829c3258c9 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.h +++ b/src/Backups/BackupIO_AzureBlobStorage.h @@ -1,5 +1,6 @@ #pragma once +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -16,7 +17,7 @@ namespace DB class BackupReaderAzureBlobStorage : public BackupReaderDefault { public: - BackupReaderAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); + BackupReaderAzureBlobStorage(const AzureBlobStorage::ConnectionParams & connection_params_, const String & blob_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); ~BackupReaderAzureBlobStorage() override; bool fileExists(const String & file_name) override; @@ -29,15 +30,16 @@ public: private: const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureBlob::Configuration configuration; + AzureBlobStorage::ConnectionParams connection_params; + String blob_path; std::unique_ptr object_storage; - std::shared_ptr settings; + std::shared_ptr settings; }; class BackupWriterAzureBlobStorage : public BackupWriterDefault { public: - BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container); + BackupWriterAzureBlobStorage(const AzureBlobStorage::ConnectionParams & connection_params_, const String & blob_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container); ~BackupWriterAzureBlobStorage() override; bool fileExists(const String & file_name) override; @@ -58,9 +60,10 @@ private: void removeFilesBatch(const Strings & file_names); const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureBlob::Configuration configuration; + AzureBlobStorage::ConnectionParams connection_params; + String blob_path; std::unique_ptr object_storage; - std::shared_ptr settings; + std::shared_ptr settings; }; } diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 1b9545fc455..6974d16e2f6 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -1,3 +1,4 @@ +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #include @@ -49,7 +50,9 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) const String & id_arg = params.backup_info.id_arg; const auto & args = params.backup_info.args; - StorageAzureBlob::Configuration configuration; + String blob_path; + AzureBlobStorage::ConnectionParams connection_params; + auto request_settings = AzureBlobStorage::getRequestSettings(params.context->getSettingsRef()); if (!id_arg.empty()) { @@ -59,54 +62,41 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (!config.has(config_prefix)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg); - if (config.has(config_prefix + ".connection_string")) + connection_params = { - configuration.connection_url = config.getString(config_prefix + ".connection_string"); - configuration.is_connection_string = true; - configuration.container = config.getString(config_prefix + ".container"); - } - else - { - configuration.connection_url = config.getString(config_prefix + ".storage_account_url"); - configuration.is_connection_string = false; - configuration.container = config.getString(config_prefix + ".container"); - configuration.account_name = config.getString(config_prefix + ".account_name"); - configuration.account_key = config.getString(config_prefix + ".account_key"); - - if (config.has(config_prefix + ".account_name") && config.has(config_prefix + ".account_key")) - { - configuration.account_name = config.getString(config_prefix + ".account_name"); - configuration.account_key = config.getString(config_prefix + ".account_key"); - } - } + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true), + }; if (args.size() > 1) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]"); if (args.size() == 1) - configuration.blob_path = args[0].safeGet(); - + blob_path = args[0].safeGet(); } else { if (args.size() == 3) { - configuration.connection_url = args[0].safeGet(); - configuration.is_connection_string = !configuration.connection_url.starts_with("http"); + auto connection_url = args[0].safeGet(); + auto container_name = args[1].safeGet(); + blob_path = args[2].safeGet(); - configuration.container = args[1].safeGet(); - configuration.blob_path = args[2].safeGet(); + AzureBlobStorage::processURL(connection_url, container_name, connection_params.endpoint, connection_params.auth_method); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true); } else if (args.size() == 5) { - configuration.connection_url = args[0].safeGet(); - configuration.is_connection_string = false; + connection_params.endpoint.storage_account_url = args[0].safeGet(); + connection_params.endpoint.container_name = args[1].safeGet(); + blob_path = args[2].safeGet(); - configuration.container = args[1].safeGet(); - configuration.blob_path = args[2].safeGet(); - configuration.account_name = args[3].safeGet(); - configuration.account_key = args[4].safeGet(); + auto account_name = args[3].safeGet(); + auto account_key = args[4].safeGet(); + connection_params.auth_method = std::make_shared(account_name, account_key); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true); } else { @@ -116,12 +106,12 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) } BackupImpl::ArchiveParams archive_params; - if (hasRegisteredArchiveFileExtension(configuration.blob_path)) + if (hasRegisteredArchiveFileExtension(blob_path)) { if (params.is_internal_backup) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled"); - archive_params.archive_name = removeFileNameFromURL(configuration.blob_path); + archive_params.archive_name = removeFileNameFromURL(blob_path); archive_params.compression_method = params.compression_method; archive_params.compression_level = params.compression_level; archive_params.password = params.password; @@ -135,7 +125,9 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (params.open_mode == IBackup::OpenMode::READ) { - auto reader = std::make_shared(configuration, + auto reader = std::make_shared( + connection_params, + blob_path, params.read_settings, params.write_settings, params.context); @@ -150,7 +142,9 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) } else { - auto writer = std::make_shared(configuration, + auto writer = std::make_shared( + connection_params, + blob_path, params.read_settings, params.write_settings, params.context, diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 04029983d84..e09fef794d7 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -79,23 +79,13 @@ class IColumn; M(UInt64, distributed_connections_pool_size, 1024, "Maximum number of connections with one remote server in the pool.", 0) \ M(UInt64, connections_with_failover_max_tries, 3, "The maximum number of attempts to connect to replicas.", 0) \ M(UInt64, s3_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to S3 (some implementations does not supports variable size parts).", 0) \ - M(UInt64, azure_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to Azure blob storage.", 0) \ M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ M(UInt64, s3_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to S3.", 0) \ - M(UInt64, azure_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage.", 0) \ - M(UInt64, azure_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage.", 0) \ M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \ M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3, s3_min_upload_part_size is multiplied by s3_upload_part_size_multiply_factor.", 0) \ - M(UInt64, azure_upload_part_size_multiply_factor, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage.", 0) \ - M(UInt64, azure_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor.", 0) \ M(UInt64, s3_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \ - M(UInt64, azure_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \ M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \ - M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \ - M(UInt64, azure_max_single_part_copy_size, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage.", 0) \ M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \ - M(UInt64, azure_max_single_read_retries, 4, "The maximum number of retries during single Azure blob storage read.", 0) \ - M(UInt64, azure_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write", 0) \ M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \ M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ @@ -105,20 +95,33 @@ class IColumn; M(UInt64, s3_max_put_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_put_rps`", 0) \ M(UInt64, s3_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ M(Bool, s3_use_adaptive_timeouts, true, "When adaptive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \ - M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \ - M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \ M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \ M(Bool, s3_skip_empty_files, false, "Allow to skip empty files in s3 table engine", 0) \ - M(Bool, azure_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in azure engine tables", 0) \ M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \ M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \ - M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \ M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \ M(UInt64, s3_connect_timeout_ms, 1000, "Connection timeout for host from s3 disks.", 0) \ + M(UInt64, azure_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to Azure blob storage.", 0) \ + M(UInt64, azure_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage.", 0) \ + M(UInt64, azure_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage.", 0) \ + M(UInt64, azure_upload_part_size_multiply_factor, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage.", 0) \ + M(UInt64, azure_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor.", 0) \ + M(UInt64, azure_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \ + M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \ + M(UInt64, azure_max_single_part_copy_size, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage.", 0) \ + M(UInt64, azure_max_single_read_retries, 4, "The maximum number of retries during single Azure blob storage read.", 0) \ + M(UInt64, azure_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write", 0) \ + M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ + M(UInt64, azure_sdk_max_retries, 10, "Maximum number of retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff beetween retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff beetween retries in azure sdk", 0) \ + M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \ + M(Bool, azure_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in azure engine tables", 0) \ + M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \ M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \ M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \ M(Bool, s3queue_enable_logging_to_s3queue_log, false, "Enable writing to system.s3queue_log. The value can be overwritten per table with table settings", 0) \ diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 2c90e3a9003..cadae33e23e 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -26,7 +26,7 @@ struct WriteBufferFromAzureBlobStorage::PartData std::string block_id; }; -BufferAllocationPolicyPtr createBufferAllocationPolicy(const AzureObjectStorageSettings & settings) +BufferAllocationPolicyPtr createBufferAllocationPolicy(const AzureBlobStorage::RequestSettings & settings) { BufferAllocationPolicy::Settings allocation_settings; allocation_settings.strict_size = settings.strict_upload_part_size; @@ -44,7 +44,7 @@ WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( const String & blob_path_, size_t buf_size_, const WriteSettings & write_settings_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_) : WriteBufferFromFileBase(buf_size_, nullptr, 0) , log(getLogger("WriteBufferFromAzureBlobStorage")) diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 96ba6acefff..f47ba92beab 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -35,7 +35,7 @@ public: const String & blob_path_, size_t buf_size_, const WriteSettings & write_settings_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); ~WriteBufferFromAzureBlobStorage() override; diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h deleted file mode 100644 index e4775a053c1..00000000000 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include - -namespace DB -{ - -struct AzureBlobStorageEndpoint -{ - const String storage_account_url; - const String account_name; - const String container_name; - const String prefix; - const std::optional container_already_exists; - - String getEndpoint() - { - String url = storage_account_url; - if (url.ends_with('/')) - url.pop_back(); - - if (!account_name.empty()) - url += "/" + account_name; - - if (!container_name.empty()) - url += "/" + container_name; - - if (!prefix.empty()) - url += "/" + prefix; - - return url; - } - - String getEndpointWithoutContainer() - { - String url = storage_account_url; - - if (!account_name.empty()) - url += "/" + account_name; - - return url; - } -}; - -std::unique_ptr getAzureBlobContainerClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); - -AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); - -std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); - -} - -#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp similarity index 53% rename from src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp rename to src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp index a535b007541..76054efff19 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp @@ -1,4 +1,8 @@ -#include +#include +#include +#include +#include +#include #if USE_AZURE_BLOB_STORAGE @@ -7,13 +11,9 @@ #include #include #include -#include #include #include -using namespace Azure::Storage::Blobs; - - namespace DB { @@ -22,8 +22,10 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +namespace AzureBlobStorage +{ -void validateStorageAccountUrl(const String & storage_account_url) +static void validateStorageAccountUrl(const String & storage_account_url) { const auto * storage_account_url_pattern_str = R"(http(()|s)://[a-z0-9-.:]+(()|/)[a-z0-9]*(()|/))"; static const RE2 storage_account_url_pattern(storage_account_url_pattern_str); @@ -33,8 +35,7 @@ void validateStorageAccountUrl(const String & storage_account_url) "Blob Storage URL is not valid, should follow the format: {}, got: {}", storage_account_url_pattern_str, storage_account_url); } - -void validateContainerName(const String & container_name) +static void validateContainerName(const String & container_name) { auto len = container_name.length(); if (len < 3 || len > 64) @@ -50,13 +51,51 @@ void validateContainerName(const String & container_name) container_name_pattern_str, container_name); } +static bool isConnectionString(const std::string & candidate) +{ + return !candidate.starts_with("http"); +} -AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +String ConnectionParams::getConnectionURL() const +{ + if (std::holds_alternative(auth_method)) + { + auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(endpoint.storage_account_url); + return parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl(); + } + + return endpoint.storage_account_url; +} + +std::unique_ptr ConnectionParams::createForService() const +{ + return std::visit([this](const T & auth) + { + if constexpr (std::is_same_v) + return std::make_unique(ServiceClient::CreateFromConnectionString(auth.toUnderType(), client_options)); + else + return std::make_unique(endpoint.getEndpointWithoutContainer(), auth, client_options); + }, auth_method); +} + +std::unique_ptr ConnectionParams::createForContainer() const +{ + return std::visit([this](const T & auth) + { + if constexpr (std::is_same_v) + return std::make_unique(ContainerClient::CreateFromConnectionString(auth.toUnderType(), endpoint.container_name, client_options)); + else + return std::make_unique(endpoint.getEndpoint(), auth, client_options); + }, auth_method); +} + +Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { String storage_url; String account_name; String container_name; String prefix; + if (config.has(config_prefix + ".endpoint")) { String endpoint = config.getString(config_prefix + ".endpoint"); @@ -71,48 +110,48 @@ AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::Abstr if (endpoint_contains_account_name) { - size_t acc_pos_begin = endpoint.find('/', pos+2); + size_t acc_pos_begin = endpoint.find('/', pos + 2); if (acc_pos_begin == std::string::npos) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected account_name in endpoint"); - storage_url = endpoint.substr(0,acc_pos_begin); - size_t acc_pos_end = endpoint.find('/',acc_pos_begin+1); + storage_url = endpoint.substr(0, acc_pos_begin); + size_t acc_pos_end = endpoint.find('/', acc_pos_begin + 1); if (acc_pos_end == std::string::npos) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); - account_name = endpoint.substr(acc_pos_begin+1,(acc_pos_end-acc_pos_begin)-1); + account_name = endpoint.substr(acc_pos_begin + 1, acc_pos_end - acc_pos_begin - 1); - size_t cont_pos_end = endpoint.find('/', acc_pos_end+1); + size_t cont_pos_end = endpoint.find('/', acc_pos_end + 1); if (cont_pos_end != std::string::npos) { - container_name = endpoint.substr(acc_pos_end+1,(cont_pos_end-acc_pos_end)-1); - prefix = endpoint.substr(cont_pos_end+1); + container_name = endpoint.substr(acc_pos_end + 1, cont_pos_end - acc_pos_end - 1); + prefix = endpoint.substr(cont_pos_end + 1); } else { - container_name = endpoint.substr(acc_pos_end+1); + container_name = endpoint.substr(acc_pos_end + 1); } } else { - size_t cont_pos_begin = endpoint.find('/', pos+2); + size_t cont_pos_begin = endpoint.find('/', pos + 2); if (cont_pos_begin == std::string::npos) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); - storage_url = endpoint.substr(0,cont_pos_begin); - size_t cont_pos_end = endpoint.find('/',cont_pos_begin+1); + storage_url = endpoint.substr(0, cont_pos_begin); + size_t cont_pos_end = endpoint.find('/', cont_pos_begin + 1); if (cont_pos_end != std::string::npos) { - container_name = endpoint.substr(cont_pos_begin+1,(cont_pos_end-cont_pos_begin)-1); - prefix = endpoint.substr(cont_pos_end+1); + container_name = endpoint.substr(cont_pos_begin + 1,cont_pos_end - cont_pos_begin - 1); + prefix = endpoint.substr(cont_pos_end + 1); } else { - container_name = endpoint.substr(cont_pos_begin+1); + container_name = endpoint.substr(cont_pos_begin + 1); } } } @@ -132,122 +171,117 @@ AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::Abstr if (!container_name.empty()) validateContainerName(container_name); + std::optional container_already_exists {}; if (config.has(config_prefix + ".container_already_exists")) container_already_exists = {config.getBool(config_prefix + ".container_already_exists")}; - return {storage_url, account_name, container_name, prefix, container_already_exists}; + + return {storage_url, account_name, container_name, prefix, "", container_already_exists}; } - -template -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name, const BlobClientOptions & client_options) = delete; - -template<> -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & /*container_name*/, const BlobClientOptions & client_options) +void processURL(const String & url, const String & container_name, Endpoint & endpoint, AuthMethod & auth_method) { - return std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_str, client_options)); -} + endpoint.container_name = container_name; -template<> -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name, const BlobClientOptions & client_options) -{ - return std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_str, container_name, client_options)); -} - -template -std::unique_ptr getAzureBlobStorageClientWithAuth( - const String & url, - const String & container_name, - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - const Azure::Storage::Blobs::BlobClientOptions & client_options) -{ - std::string connection_str; - if (config.has(config_prefix + ".connection_string")) - connection_str = config.getString(config_prefix + ".connection_string"); - - if (!connection_str.empty()) - return getClientWithConnectionString(connection_str, container_name, client_options); - - if (config.has(config_prefix + ".account_key") && config.has(config_prefix + ".account_name")) + if (isConnectionString(url)) { - auto storage_shared_key_credential = std::make_shared( - config.getString(config_prefix + ".account_name"), - config.getString(config_prefix + ".account_key") - ); - return std::make_unique(url, storage_shared_key_credential, client_options); + endpoint.storage_account_url = url; + auth_method = ConnectionString{url}; + return; } - if (config.getBool(config_prefix + ".use_workload_identity", false)) - { - auto workload_identity_credential = std::make_shared(); - return std::make_unique(url, workload_identity_credential); - } + size_t pos = url.find('?'); - auto managed_identity_credential = std::make_shared(); - return std::make_unique(url, managed_identity_credential, client_options); + /// If conneciton_url does not have '?', then its not SAS + if (pos == std::string::npos) + { + endpoint.storage_account_url = url; + auth_method = std::make_shared(); + } + else + { + endpoint.storage_account_url = url.substr(0, pos); + endpoint.sas_auth = url.substr(pos + 1); + auth_method = std::make_shared(); + } } -Azure::Storage::Blobs::BlobClientOptions getAzureBlobClientOptions(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +std::unique_ptr getContainerClient(const ConnectionParams & params, bool readonly) { - Azure::Core::Http::Policies::RetryOptions retry_options; - retry_options.MaxRetries = config.getUInt(config_prefix + ".max_tries", 10); - retry_options.RetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_initial_backoff_ms", 10)); - retry_options.MaxRetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_max_backoff_ms", 1000)); - - using CurlOptions = Azure::Core::Http::CurlTransportOptions; - CurlOptions curl_options; - curl_options.NoSignal = true; - - if (config.has(config_prefix + ".curl_ip_resolve")) - { - auto value = config.getString(config_prefix + ".curl_ip_resolve"); - if (value == "ipv4") - curl_options.IPResolve = CurlOptions::CURL_IPRESOLVE_V4; - else if (value == "ipv6") - curl_options.IPResolve = CurlOptions::CURL_IPRESOLVE_V6; - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value for option 'curl_ip_resolve': {}. Expected one of 'ipv4' or 'ipv6'", value); - } - - Azure::Storage::Blobs::BlobClientOptions client_options; - client_options.Retry = retry_options; - client_options.Transport.Transport = std::make_shared(curl_options); - - client_options.ClickhouseOptions = Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}; - - return client_options; -} - -std::unique_ptr getAzureBlobContainerClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - auto endpoint = processAzureBlobStorageEndpoint(config, config_prefix); - auto container_name = endpoint.container_name; - auto final_url = endpoint.getEndpoint(); - auto client_options = getAzureBlobClientOptions(config, config_prefix); - - if (endpoint.container_already_exists.value_or(false)) - return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix, client_options); - - auto blob_service_client = getAzureBlobStorageClientWithAuth(endpoint.getEndpointWithoutContainer(), container_name, config, config_prefix, client_options); + if (params.endpoint.container_already_exists.value_or(false) || readonly) + return params.createForContainer(); try { - return std::make_unique(blob_service_client->CreateBlobContainer(container_name).Value); + auto service_client = params.createForService(); + return std::make_unique(service_client->CreateBlobContainer(params.endpoint.container_name).Value); } catch (const Azure::Storage::StorageException & e) { /// If container_already_exists is not set (in config), ignore already exists error. /// (Conflict - The specified container already exists) - if (!endpoint.container_already_exists.has_value() && e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) - return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix, client_options); + if (!params.endpoint.container_already_exists.has_value() && e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) + return params.createForContainer(); throw; } } -std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +AuthMethod getAuthMethod(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { - std::unique_ptr settings = std::make_unique(); + if (config.has(config_prefix + ".account_key") && config.has(config_prefix + ".account_name")) + { + return std::make_shared( + config.getString(config_prefix + ".account_name"), + config.getString(config_prefix + ".account_key") + ); + } + + if (config.has(config_prefix + ".connection_string")) + return ConnectionString{config.getString(config_prefix + ".connection_string")}; + + if (config.getBool(config_prefix + ".use_workload_identity", false)) + return std::make_shared(); + + return std::make_shared(); +} + +BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_disk) +{ + Azure::Core::Http::Policies::RetryOptions retry_options; + retry_options.MaxRetries = static_cast(settings.sdk_max_retries); + retry_options.RetryDelay = std::chrono::milliseconds(settings.sdk_retry_initial_backoff_ms); + retry_options.MaxRetryDelay = std::chrono::milliseconds(settings.sdk_retry_max_backoff_ms); + + Azure::Core::Http::CurlTransportOptions curl_options; + curl_options.NoSignal = true; + curl_options.IPResolve = settings.curl_ip_resolve; + + Azure::Storage::Blobs::BlobClientOptions client_options; + client_options.Retry = retry_options; + client_options.Transport.Transport = std::make_shared(curl_options); + client_options.ClickhouseOptions = Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=for_disk}; + + return client_options; +} + +std::unique_ptr getRequestSettings(const Settings & query_settings) +{ + auto settings_ptr = std::make_unique(); + + settings_ptr->max_single_part_upload_size = query_settings.azure_max_single_part_upload_size; + settings_ptr->max_single_read_retries = query_settings.azure_max_single_read_retries; + settings_ptr->list_object_keys_size = static_cast(query_settings.azure_list_object_keys_size); + settings_ptr->sdk_max_retries = query_settings.azure_sdk_max_retries; + settings_ptr->sdk_retry_initial_backoff_ms = query_settings.azure_sdk_retry_initial_backoff_ms; + settings_ptr->sdk_retry_max_backoff_ms = query_settings.azure_sdk_retry_max_backoff_ms; + + return settings_ptr; +} + +std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +{ + auto settings = std::make_unique(); + settings->max_single_part_upload_size = config.getUInt64(config_prefix + ".max_single_part_upload_size", context->getSettings().azure_max_single_part_upload_size); settings->min_bytes_for_seek = config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024); settings->max_single_read_retries = config.getInt(config_prefix + ".max_single_read_retries", 3); @@ -262,10 +296,28 @@ std::unique_ptr getAzureBlobStorageSettings(const Po settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", context->getSettings().azure_strict_upload_part_size); settings->upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".upload_part_size_multiply_factor", context->getSettings().azure_upload_part_size_multiply_factor); settings->upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".upload_part_size_multiply_parts_count_threshold", context->getSettings().azure_upload_part_size_multiply_parts_count_threshold); + settings->sdk_max_retries = config.getUInt(config_prefix + ".max_tries", 10); + settings->sdk_retry_initial_backoff_ms = config.getUInt(config_prefix + ".retry_initial_backoff_ms", 10); + settings->sdk_retry_max_backoff_ms = config.getUInt(config_prefix + ".retry_max_backoff_ms", 1000); + + if (config.has(config_prefix + ".curl_ip_resolve")) + { + using CurlOptions = Azure::Core::Http::CurlTransportOptions; + + auto value = config.getString(config_prefix + ".curl_ip_resolve"); + if (value == "ipv4") + settings->curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_V4; + else if (value == "ipv6") + settings->curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_V6; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value for option 'curl_ip_resolve': {}. Expected one of 'ipv4' or 'ipv6'", value); + } return settings; } } +} + #endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h new file mode 100644 index 00000000000..7e716adf4d0 --- /dev/null +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h @@ -0,0 +1,139 @@ +#pragma once + +#include +#include +#include "base/strong_typedef.h" +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace AzureBlobStorage +{ + +using ServiceClient = Azure::Storage::Blobs::BlobServiceClient; +using ContainerClient = Azure::Storage::Blobs::BlobContainerClient; +using BlobClient = Azure::Storage::Blobs::BlobClient; +using BlobClientOptions = Azure::Storage::Blobs::BlobClientOptions; + +struct RequestSettings +{ + RequestSettings() = default; + + size_t max_single_part_upload_size = 100 * 1024 * 1024; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset + uint64_t min_bytes_for_seek = 1024 * 1024; + size_t max_single_read_retries = 3; + size_t max_single_download_retries = 3; + int list_object_keys_size = 1000; + size_t min_upload_part_size = 16 * 1024 * 1024; + size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; + size_t max_single_part_copy_size = 256 * 1024 * 1024; + bool use_native_copy = false; + size_t max_unexpected_write_error_retries = 4; + size_t max_inflight_parts_for_one_file = 20; + size_t strict_upload_part_size = 0; + size_t upload_part_size_multiply_factor = 2; + size_t upload_part_size_multiply_parts_count_threshold = 500; + size_t sdk_max_retries = 10; + size_t sdk_retry_initial_backoff_ms = 10; + size_t sdk_retry_max_backoff_ms = 1000; + + using CurlOptions = Azure::Core::Http::CurlTransportOptions; + CurlOptions::CurlOptIPResolve curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_WHATEVER; +}; + +struct Endpoint +{ + String storage_account_url; + String account_name; + String container_name; + String prefix; + String sas_auth; + std::optional container_already_exists; + + String getEndpoint() const + { + String url = storage_account_url; + if (url.ends_with('/')) + url.pop_back(); + + if (!account_name.empty()) + url += "/" + account_name; + + if (!container_name.empty()) + url += "/" + container_name; + + if (!prefix.empty()) + url += "/" + prefix; + + if (!sas_auth.empty()) + url += "?" + sas_auth; + + return url; + } + + String getEndpointWithoutContainer() const + { + String url = storage_account_url; + + if (!account_name.empty()) + url += "/" + account_name; + + if (!sas_auth.empty()) + url += "?" + sas_auth; + + return url; + } +}; + +using ConnectionString = StrongTypedef; + +using AuthMethod = std::variant< + ConnectionString, + std::shared_ptr, + std::shared_ptr, + std::shared_ptr>; + +struct ConnectionParams +{ + Endpoint endpoint; + AuthMethod auth_method; + BlobClientOptions client_options; + + String getContainer() const { return endpoint.container_name; } + String getConnectionURL() const; + + std::unique_ptr createForService() const; + std::unique_ptr createForContainer() const; +}; + +Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); +void processURL(const String & url, const String & container_name, Endpoint & endpoint, AuthMethod & auth_method); + +std::unique_ptr getContainerClient(const ConnectionParams & params, bool readonly); + +BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_disk); +AuthMethod getAuthMethod(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); +std::unique_ptr getRequestSettings(const Settings & query_settings); +std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + +} + +} + +#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 36225b13ee8..d0f39beb3ca 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include @@ -105,7 +105,7 @@ private: AzureObjectStorage::AzureObjectStorage( const String & name_, - AzureClientPtr && client_, + ClientPtr && client_, SettingsPtr && settings_, const String & object_namespace_) : name(name_) @@ -397,20 +397,37 @@ void AzureObjectStorage::copyObject( /// NOLINT void AzureObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { - auto new_settings = getAzureBlobStorageSettings(config, config_prefix, context); + auto new_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); + bool is_client_for_disk = client.get()->GetClickhouseOptions().IsClientForDisk; + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*new_settings, is_client_for_disk), + }; + + auto new_client = AzureBlobStorage::getContainerClient(params, /*readonly=*/ true); + settings.set(std::move(new_settings)); - /// We don't update client + client.set(std::move(new_client)); } std::unique_ptr AzureObjectStorage::cloneObjectStorage(const std::string &, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { - return std::make_unique( - name, - getAzureBlobContainerClient(config, config_prefix), - getAzureBlobStorageSettings(config, config_prefix, context), - object_namespace - ); + auto new_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); + bool is_client_for_disk = client.get()->GetClickhouseOptions().IsClientForDisk; + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*new_settings, is_client_for_disk), + }; + + auto new_client = AzureBlobStorage::getContainerClient(params, /*readonly=*/ true); + return std::make_unique(name, std::move(new_client), std::move(new_settings), object_namespace); } } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index f52ab803012..d8440453852 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -1,4 +1,5 @@ #pragma once +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -7,6 +8,7 @@ #include #include #include +#include namespace Poco { @@ -16,70 +18,15 @@ class Logger; namespace DB { -struct AzureObjectStorageSettings -{ - AzureObjectStorageSettings( - uint64_t max_single_part_upload_size_, - uint64_t min_bytes_for_seek_, - int max_single_read_retries_, - int max_single_download_retries_, - int list_object_keys_size_, - size_t min_upload_part_size_, - size_t max_upload_part_size_, - size_t max_single_part_copy_size_, - bool use_native_copy_, - size_t max_unexpected_write_error_retries_, - size_t max_inflight_parts_for_one_file_, - size_t strict_upload_part_size_, - size_t upload_part_size_multiply_factor_, - size_t upload_part_size_multiply_parts_count_threshold_) - : max_single_part_upload_size(max_single_part_upload_size_) - , min_bytes_for_seek(min_bytes_for_seek_) - , max_single_read_retries(max_single_read_retries_) - , max_single_download_retries(max_single_download_retries_) - , list_object_keys_size(list_object_keys_size_) - , min_upload_part_size(min_upload_part_size_) - , max_upload_part_size(max_upload_part_size_) - , max_single_part_copy_size(max_single_part_copy_size_) - , use_native_copy(use_native_copy_) - , max_unexpected_write_error_retries(max_unexpected_write_error_retries_) - , max_inflight_parts_for_one_file(max_inflight_parts_for_one_file_) - , strict_upload_part_size(strict_upload_part_size_) - , upload_part_size_multiply_factor(upload_part_size_multiply_factor_) - , upload_part_size_multiply_parts_count_threshold(upload_part_size_multiply_parts_count_threshold_) - { - } - - AzureObjectStorageSettings() = default; - - size_t max_single_part_upload_size = 100 * 1024 * 1024; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset - uint64_t min_bytes_for_seek = 1024 * 1024; - size_t max_single_read_retries = 3; - size_t max_single_download_retries = 3; - int list_object_keys_size = 1000; - size_t min_upload_part_size = 16 * 1024 * 1024; - size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; - size_t max_single_part_copy_size = 256 * 1024 * 1024; - bool use_native_copy = false; - size_t max_unexpected_write_error_retries = 4; - size_t max_inflight_parts_for_one_file = 20; - size_t strict_upload_part_size = 0; - size_t upload_part_size_multiply_factor = 2; - size_t upload_part_size_multiply_parts_count_threshold = 500; -}; - -using AzureClient = Azure::Storage::Blobs::BlobContainerClient; -using AzureClientPtr = std::unique_ptr; - class AzureObjectStorage : public IObjectStorage { public: - - using SettingsPtr = std::unique_ptr; + using ClientPtr = std::unique_ptr; + using SettingsPtr = std::unique_ptr; AzureObjectStorage( const String & name_, - AzureClientPtr && client_, + ClientPtr && client_, SettingsPtr && settings_, const String & object_namespace_); @@ -156,12 +103,8 @@ public: bool isRemote() const override { return true; } - std::shared_ptr getSettings() { return settings.get(); } - - std::shared_ptr getAzureBlobStorageClient() override - { - return client.get(); - } + std::shared_ptr getSettings() const { return settings.get(); } + std::shared_ptr getAzureBlobStorageClient() const override{ return client.get(); } private: using SharedAzureClientPtr = std::shared_ptr; @@ -169,8 +112,8 @@ private: const String name; /// client used to access the files in the Blob Storage cloud - MultiVersion client; - MultiVersion settings; + MultiVersion client; + MultiVersion settings; const String object_namespace; /// container + prefix LoggerPtr log; diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index 961c2709efc..60818933dec 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -120,7 +120,7 @@ public: const FileCacheSettings & getCacheSettings() const { return cache_settings; } #if USE_AZURE_BLOB_STORAGE - std::shared_ptr getAzureBlobStorageClient() override + std::shared_ptr getAzureBlobStorageClient() const override { return object_storage->getAzureBlobStorageClient(); } diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index eae31af9d44..27a58053752 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -238,7 +238,7 @@ public: virtual void setKeysGenerator(ObjectStorageKeysGeneratorPtr) { } #if USE_AZURE_BLOB_STORAGE - virtual std::shared_ptr getAzureBlobStorageClient() + virtual std::shared_ptr getAzureBlobStorageClient() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This function is only implemented for AzureBlobStorage"); } diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 7b949db268b..f8c1c564191 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -13,7 +13,7 @@ #endif #if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) #include -#include +#include #endif #ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include @@ -293,12 +293,19 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) const ContextPtr & context, bool /* skip_access_check */) -> ObjectStoragePtr { - AzureBlobStorageEndpoint endpoint = processAzureBlobStorageEndpoint(config, config_prefix); + auto azure_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*azure_settings, /*for_disk=*/ true), + }; + return createObjectStorage( ObjectStorageType::Azure, config, config_prefix, name, - getAzureBlobContainerClient(config, config_prefix), - getAzureBlobStorageSettings(config, config_prefix, context), - endpoint.prefix.empty() ? endpoint.container_name : endpoint.container_name + "/" + endpoint.prefix); + AzureBlobStorage::getContainerClient(params, /*readonly=*/ false), std::move(azure_settings), + params.endpoint.prefix.empty() ? params.endpoint.container_name : params.endpoint.container_name + "/" + params.endpoint.prefix); }; factory.registerObjectStorageType("azure_blob_storage", creator); factory.registerObjectStorageType("azure", creator); diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 769f1a184f6..d648796b5df 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -44,7 +44,7 @@ namespace size_t total_size_, const String & dest_container_for_logging_, const String & dest_blob_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_, const Poco::Logger * log_) : create_read_buffer(create_read_buffer_) @@ -69,7 +69,7 @@ namespace size_t total_size; const String & dest_container_for_logging; const String & dest_blob; - std::shared_ptr settings; + std::shared_ptr settings; ThreadPoolCallbackRunnerUnsafe schedule; const Poco::Logger * log; size_t max_single_part_upload_size; @@ -265,7 +265,7 @@ void copyDataToAzureBlobStorageFile( std::shared_ptr dest_client, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, ThreadPoolCallbackRunnerUnsafe schedule) { UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, &Poco::Logger::get("copyDataToAzureBlobStorageFile")}; @@ -282,11 +282,10 @@ void copyAzureBlobStorageFile( size_t size, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule) { - if (settings->use_native_copy) { ProfileEvents::increment(ProfileEvents::AzureCopyObject); diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h index 6ad54923ab5..73b91191b96 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h @@ -29,7 +29,7 @@ void copyAzureBlobStorageFile( size_t src_size, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); @@ -46,7 +46,7 @@ void copyDataToAzureBlobStorageFile( std::shared_ptr client, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); } diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index f2e2833dad4..d8fd5cbf05a 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -1,4 +1,6 @@ +#include #include +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #if USE_AZURE_BLOB_STORAGE #include @@ -95,43 +97,62 @@ const std::unordered_set optional_configuration_keys = { "storage_account_url", }; -bool isConnectionString(const std::string & candidate) -{ - return !candidate.starts_with("http"); } -} - -void StorageAzureBlob::processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection) +void StorageAzureBlob::processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection, const ContextPtr & local_context) { validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); + String connection_url; + String container_name; + std::optional account_name; + std::optional account_key; + if (collection.has("connection_string")) - { - configuration.connection_url = collection.get("connection_string"); - configuration.is_connection_string = true; - } + connection_url = collection.get("connection_string"); + else if (collection.has("storage_account_url")) + connection_url = collection.get("storage_account_url"); - if (collection.has("storage_account_url")) - { - configuration.connection_url = collection.get("storage_account_url"); - configuration.is_connection_string = false; - } - - configuration.container = collection.get("container"); + container_name = collection.get("container"); configuration.blob_path = collection.get("blob_path"); if (collection.has("account_name")) - configuration.account_name = collection.get("account_name"); + account_name = collection.get("account_name"); if (collection.has("account_key")) - configuration.account_key = collection.get("account_key"); + account_key = collection.get("account_key"); configuration.structure = collection.getOrDefault("structure", "auto"); configuration.format = collection.getOrDefault("format", configuration.format); configuration.compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); + configuration.connection_params = getConnectionParams(connection_url, container_name, account_name, account_key, local_context); } +AzureBlobStorage::ConnectionParams StorageAzureBlob::getConnectionParams( + const String & connection_url, + const String & container_name, + const std::optional & account_name, + const std::optional & account_key, + const ContextPtr & local_context) +{ + AzureBlobStorage::ConnectionParams connection_params; + auto request_settings = AzureBlobStorage::getRequestSettings(local_context->getSettingsRef()); + + if (account_name && account_key) + { + connection_params.endpoint.storage_account_url = connection_url; + connection_params.endpoint.container_name = container_name; + connection_params.auth_method = std::make_shared(*account_name, *account_key); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ false); + } + else + { + AzureBlobStorage::processURL(connection_url, container_name, connection_params.endpoint, connection_params.auth_method); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ false); + } + + return connection_params; +} StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine_args, const ContextPtr & local_context) { @@ -144,8 +165,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) { - processNamedCollectionResult(configuration, *named_collection); - + processNamedCollectionResult(configuration, *named_collection, local_context); configuration.blobs_paths = {configuration.blob_path}; if (configuration.format == "auto") @@ -164,11 +184,12 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine std::unordered_map engine_args_to_idx; - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); + String connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); + String container = checkAndGetLiteralArgument(engine_args[1], "container"); + configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blob_path"); - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); - configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); + std::optional account_name; + std::optional account_key; auto is_format_arg = [] (const std::string & s) -> bool { @@ -198,8 +219,8 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine } else { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); } } else if (engine_args.size() == 6) @@ -211,12 +232,13 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine } else { - configuration.account_name = fourth_arg; + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); if (!is_format_arg(sixth_arg)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + configuration.format = sixth_arg; } } @@ -229,17 +251,20 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine } else { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); if (!is_format_arg(sixth_arg)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + configuration.format = sixth_arg; configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); } } configuration.blobs_paths = {configuration.blob_path}; + configuration.connection_params = getConnectionParams(connection_url, container, account_name, account_key, local_context); if (configuration.format == "auto") configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); @@ -247,18 +272,6 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine return configuration; } - -AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(const ContextPtr & local_context) -{ - const auto & context_settings = local_context->getSettingsRef(); - auto settings_ptr = std::make_unique(); - settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; - settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; - settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); - - return settings_ptr; -} - void registerStorageAzureBlob(StorageFactory & factory) { factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args) @@ -268,7 +281,8 @@ void registerStorageAzureBlob(StorageFactory & factory) throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); auto configuration = StorageAzureBlob::getConfiguration(engine_args, args.getLocalContext()); - auto client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); + auto client = AzureBlobStorage::getContainerClient(configuration.connection_params, /*readonly=*/ false); + // Use format settings from global server context + settings from // the SETTINGS clause of the create query. Settings from current // session and user are ignored. @@ -299,11 +313,11 @@ void registerStorageAzureBlob(StorageFactory & factory) if (args.storage_def->partition_by) partition_by = args.storage_def->partition_by->clone(); - auto settings = StorageAzureBlob::createSettings(args.getContext()); + auto azure_settings = AzureBlobStorage::getRequestSettings(args.getContext()->getSettingsRef()); return std::make_shared( - std::move(configuration), - std::make_unique("AzureBlobStorage", std::move(client), std::move(settings),configuration.container), + configuration, + std::make_unique("AzureBlobStorage", std::move(client), std::move(azure_settings), configuration.connection_params.getContainer()), args.getContext(), args.table_id, args.columns, @@ -321,177 +335,6 @@ void registerStorageAzureBlob(StorageFactory & factory) }); } -static bool containerExists(std::unique_ptr &blob_service_client, std::string container_name) -{ - Azure::Storage::Blobs::ListBlobContainersOptions options; - options.Prefix = container_name; - options.PageSizeHint = 1; - - auto containers_list_response = blob_service_client->ListBlobContainers(options); - auto containers_list = containers_list_response.BlobContainers; - - for (const auto & container : containers_list) - { - if (container_name == container.Name) - return true; - } - return false; -} - -AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container) -{ - AzureClientPtr result; - - if (configuration.is_connection_string) - { - std::shared_ptr managed_identity_credential = std::make_shared(); - std::unique_ptr blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(configuration.connection_url)); - result = std::make_unique(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container)); - - if (attempt_to_create_container) - { - bool container_exists = containerExists(blob_service_client,configuration.container); - if (!container_exists) - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - - try - { - result->CreateIfNotExists(); - } - catch (const Azure::Storage::StorageException & e) - { - if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.")) - { - throw; - } - } - } - } - } - else - { - std::shared_ptr storage_shared_key_credential; - if (configuration.account_name.has_value() && configuration.account_key.has_value()) - { - storage_shared_key_credential - = std::make_shared(*configuration.account_name, *configuration.account_key); - } - - std::unique_ptr blob_service_client; - size_t pos = configuration.connection_url.find('?'); - std::shared_ptr managed_identity_credential; - if (storage_shared_key_credential) - { - blob_service_client = std::make_unique(configuration.connection_url, storage_shared_key_credential); - } - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, workload_identity_credential); - } - else - { - managed_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, managed_identity_credential); - } - } - - std::string final_url; - if (pos != std::string::npos) - { - auto url_without_sas = configuration.connection_url.substr(0, pos); - final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + configuration.container - + configuration.connection_url.substr(pos); - } - else - final_url - = configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container; - - if (!attempt_to_create_container) - { - if (storage_shared_key_credential) - return std::make_unique(final_url, storage_shared_key_credential); - else - return std::make_unique(final_url, managed_identity_credential); - } - - bool container_exists = containerExists(blob_service_client,configuration.container); - if (container_exists) - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - try - { - result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } - catch (const Azure::Storage::StorageException & e) - { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.") - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - throw; - } - } - } - } - - return result; -} - -Poco::URI StorageAzureBlob::Configuration::getConnectionURL() const -{ - if (!is_connection_string) - return Poco::URI(connection_url); - - auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url); - return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); -} - - StorageAzureBlob::StorageAzureBlob( const Configuration & configuration_, std::unique_ptr && object_storage_, @@ -513,7 +356,8 @@ StorageAzureBlob::StorageAzureBlob( { if (configuration.format != "auto") FormatFactory::instance().checkFormatName(configuration.format); - context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.getConnectionURL()); + + context->getGlobalContext()->getRemoteHostFilter().checkURL(Poco::URI(configuration.connection_params.getConnectionURL())); StorageInMemoryMetadata storage_metadata; if (columns_.empty()) @@ -850,13 +694,13 @@ void ReadFromAzureBlob::createIterator(const ActionsDAG::Node * predicate) { /// Iterate through disclosed globs and make a source for each file iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blob_path, + storage->object_storage.get(), configuration.connection_params.getContainer(), configuration.blob_path, predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); } else { iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blobs_paths, + storage->object_storage.get(), configuration.connection_params.getContainer(), configuration.blobs_paths, predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); } } @@ -879,8 +723,8 @@ void ReadFromAzureBlob::initializePipeline(QueryPipelineBuilder & pipeline, cons max_block_size, configuration.compression_method, storage->object_storage.get(), - configuration.container, - configuration.connection_url, + configuration.connection_params.getContainer(), + configuration.connection_params.endpoint.storage_account_url, iterator_wrapper, need_only_count)); } @@ -1455,7 +1299,8 @@ namespace if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure) return; - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; + const auto & params = configuration.connection_params; + String source = fs::path(params.getConnectionURL()) / params.getContainer() / current_path_with_metadata.relative_path; auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows); } @@ -1466,7 +1311,8 @@ namespace || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) return; - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; + const auto & params = configuration.connection_params; + String source = fs::path(params.getConnectionURL()) / params.getContainer() / current_path_with_metadata.relative_path; auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addColumns(key, columns); } @@ -1477,7 +1323,9 @@ namespace || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) return; - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; + const auto & params = configuration.connection_params; + auto host_and_bucket = params.getConnectionURL() + '/' + params.getContainer(); + Strings sources; sources.reserve(read_keys.size()); std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; }); @@ -1520,8 +1368,10 @@ namespace return std::nullopt; }; - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; + const auto & params = configuration.connection_params; + auto host_and_bucket = params.getConnectionURL() + '/' + params.getContainer(); String source = host_and_bucket + '/' + it->relative_path; + if (format) { auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); @@ -1573,12 +1423,12 @@ std::pair StorageAzureBlob::getTableStructureAndForm if (configuration.withGlobs()) { file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blob_path, nullptr, NamesAndTypesList{}, ctx, &read_keys); + object_storage, configuration.connection_params.getContainer(), configuration.blob_path, nullptr, NamesAndTypesList{}, ctx, &read_keys); } else { file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys); + object_storage, configuration.connection_params.getContainer(), configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys); } ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, format, configuration, format_settings, read_keys, ctx); diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 20e7f4a6c90..affa02928b6 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -1,5 +1,7 @@ #pragma once +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" +#include "Interpreters/Context_fwd.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -37,21 +39,13 @@ public: bool withWildcard() const { - static const String PARTITION_ID_WILDCARD = "{_partition_id}"; + static constexpr auto PARTITION_ID_WILDCARD = "{_partition_id}"; return blobs_paths.back().find(PARTITION_ID_WILDCARD) != String::npos; } - Poco::URI getConnectionURL() const; - - std::string connection_url; - bool is_connection_string; - - std::optional account_name; - std::optional account_key; - - std::string container; std::string blob_path; std::vector blobs_paths; + AzureBlobStorage::ConnectionParams connection_params; }; StorageAzureBlob( @@ -67,16 +61,10 @@ public: ASTPtr partition_by_); static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context); - static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container = true); + static AzureBlobStorage::ConnectionParams getConnectionParams(const String & connection_url, const String & container_name, const std::optional & account_name, const std::optional & account_key, const ContextPtr & local_context); + static void processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection, const ContextPtr & local_context); - static AzureObjectStorage::SettingsPtr createSettings(const ContextPtr & local_context); - - static void processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection); - - String getName() const override - { - return name; - } + String getName() const override { return name; } void read( QueryPlan & query_plan, diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp index a80d121567a..6f6ae8763fd 100644 --- a/src/Storages/StorageAzureBlobCluster.cpp +++ b/src/Storages/StorageAzureBlobCluster.cpp @@ -1,4 +1,5 @@ #include "Storages/StorageAzureBlobCluster.h" +#include #include "config.h" @@ -41,7 +42,7 @@ StorageAzureBlobCluster::StorageAzureBlobCluster( , configuration{configuration_} , object_storage(std::move(object_storage_)) { - context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.getConnectionURL()); + context->getGlobalContext()->getRemoteHostFilter().checkURL(Poco::URI(configuration_.connection_params.getConnectionURL())); StorageInMemoryMetadata storage_metadata; if (columns_.empty()) @@ -79,7 +80,7 @@ void StorageAzureBlobCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, cons RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { auto iterator = std::make_shared( - object_storage.get(), configuration.container, configuration.blob_path, + object_storage.get(), configuration.connection_params.getContainer(), configuration.blob_path, predicate, getVirtualsList(), context, nullptr); auto callback = std::make_shared>([iterator]() mutable -> String{ return iterator->next().relative_path; }); diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp index 275cd2a9cbb..c471a72d8c7 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp @@ -35,16 +35,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -namespace -{ - -bool isConnectionString(const std::string & candidate) -{ - return !candidate.starts_with("http"); -} - -} - void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context) { /// Supported signatures: @@ -54,7 +44,7 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) { - StorageAzureBlob::processNamedCollectionResult(configuration, *named_collection); + StorageAzureBlob::processNamedCollectionResult(configuration, *named_collection, local_context); configuration.blobs_paths = {configuration.blob_path}; @@ -74,14 +64,14 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const std::unordered_map engine_args_to_idx; - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); - - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); + String connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); + String container = checkAndGetLiteralArgument(engine_args[1], "container"); configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); - auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); }; + std::optional account_name; + std::optional account_key; + + auto is_format_arg = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); }; if (engine_args.size() == 4) { @@ -105,8 +95,8 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const } else { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); } } else if (engine_args.size() == 6) @@ -120,8 +110,9 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const } else { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name/structure"); if (is_format_arg(sixth_arg)) configuration.format = sixth_arg; @@ -132,28 +123,33 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const else if (engine_args.size() == 7) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); if (!is_format_arg(sixth_arg)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + configuration.format = sixth_arg; configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); } else if (engine_args.size() == 8) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); if (!is_format_arg(sixth_arg)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + configuration.format = sixth_arg; configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); configuration.structure = checkAndGetLiteralArgument(engine_args[7], "structure"); } configuration.blobs_paths = {configuration.blob_path}; + configuration.connection_params = StorageAzureBlob::getConnectionParams(connection_url, container, account_name, account_key, local_context); if (configuration.format == "auto") configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); @@ -330,12 +326,19 @@ ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(Contex if (configuration.structure == "auto") { context->checkAccess(getSourceAccessType()); - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); - auto object_storage = std::make_unique("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container); + auto client = AzureBlobStorage::getContainerClient(configuration.connection_params, !is_insert_query); + auto settings = AzureBlobStorage::getRequestSettings(context->getSettingsRef()); + + auto object_storage = std::make_unique( + "AzureBlobStorageTableFunction", + std::move(client), + std::move(settings), + configuration.connection_params.getContainer()); + if (configuration.format == "auto") return StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, std::nullopt, context).first; + return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context); } @@ -354,8 +357,8 @@ std::unordered_set TableFunctionAzureBlobStorage::getVirtualsToCheckBefo StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); + auto client = AzureBlobStorage::getContainerClient(configuration.connection_params, !is_insert_query); + auto settings = AzureBlobStorage::getRequestSettings(context->getSettingsRef()); ColumnsDescription columns; if (configuration.structure != "auto") @@ -365,7 +368,7 @@ StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_funct StoragePtr storage = std::make_shared( configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.connection_params.getContainer()), context, StorageID(getDatabaseName(), table_name), columns, diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp index 04dddca7672..fb311c74657 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp @@ -1,3 +1,4 @@ +#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -31,30 +32,30 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( columns = structure_hint; } - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); + auto settings = AzureBlobStorage::getRequestSettings(context->getSettingsRef()); + auto client = AzureBlobStorage::getContainerClient(configuration.connection_params, !is_insert_query); if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) { /// On worker node this filename won't contains globs storage = std::make_shared( configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.connection_params.getContainer()), context, StorageID(getDatabaseName(), table_name), columns, ConstraintsDescription{}, - /* comment */String{}, - /* format_settings */std::nullopt, /// No format_settings - /* distributed_processing */ true, - /*partition_by_=*/nullptr); + /*comment=*/ String{}, + /*format_settings=*/ std::nullopt, /// No format_settings + /*distributed_processing=*/ true, + /*partition_by=*/ nullptr); } else { storage = std::make_shared( cluster_name, configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.connection_params.getContainer()), StorageID(getDatabaseName(), table_name), columns, ConstraintsDescription{}, From de3d95a7f05156330dc3cbad3dee7265a027b074 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Sat, 11 May 2024 21:00:08 +0000 Subject: [PATCH 040/273] fix style check --- src/Core/Settings.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a04d7f54884..1776163688e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -117,8 +117,8 @@ class IColumn; M(UInt64, azure_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write", 0) \ M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ M(UInt64, azure_sdk_max_retries, 10, "Maximum number of retries in azure sdk", 0) \ - M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff beetween retries in azure sdk", 0) \ - M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff beetween retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff between retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff between retries in azure sdk", 0) \ M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \ M(Bool, azure_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in azure engine tables", 0) \ M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \ From 9ef86e948ecb1408d6ce8df2e2602584d591252c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 13 May 2024 16:07:28 +0000 Subject: [PATCH 041/273] fix tests --- src/Backups/BackupIO_AzureBlobStorage.cpp | 1 - src/Backups/BackupIO_AzureBlobStorage.h | 5 +---- .../registerBackupEngineAzureBlobStorage.cpp | 3 +-- .../AzureBlobStorageCommon.cpp | 4 ---- .../AzureBlobStorage/AzureBlobStorageCommon.h | 6 +----- src/Storages/StorageAzureBlob.cpp | 19 +++++++------------ src/Storages/StorageAzureBlob.h | 3 --- .../TableFunctionAzureBlobStorageCluster.cpp | 4 ---- .../test_storage_azure_blob_storage/test.py | 2 +- 9 files changed, 11 insertions(+), 36 deletions(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 3f60ed5c0b4..6ae67ad5dfc 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h index 0829c3258c9..8f0a6e8fb5d 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.h +++ b/src/Backups/BackupIO_AzureBlobStorage.h @@ -1,13 +1,10 @@ #pragma once - -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE #include #include -#include -#include +#include namespace DB diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 6974d16e2f6..98920d80662 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -1,4 +1,3 @@ -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #include @@ -6,7 +5,7 @@ #if USE_AZURE_BLOB_STORAGE #include -#include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp index 76054efff19..a39cc89b93b 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp @@ -1,8 +1,4 @@ -#include -#include -#include #include -#include #if USE_AZURE_BLOB_STORAGE diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h index 7e716adf4d0..5f9f280ad4a 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h @@ -1,8 +1,4 @@ #pragma once - -#include -#include -#include "base/strong_typedef.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -15,10 +11,10 @@ #include #include -#include #include #include #include +#include namespace DB { diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 2341b8dc94e..a82de72af6d 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -1,6 +1,4 @@ -#include #include -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #if USE_AZURE_BLOB_STORAGE #include @@ -44,6 +42,7 @@ #include #include +#include #include @@ -1307,8 +1306,7 @@ namespace if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure) return; - const auto & params = configuration.connection_params; - String source = fs::path(params.getConnectionURL()) / params.getContainer() / current_path_with_metadata.relative_path; + String source = fs::path(configuration.connection_params.endpoint.getEndpoint()) / current_path_with_metadata.relative_path; auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows); } @@ -1319,8 +1317,7 @@ namespace || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) return; - const auto & params = configuration.connection_params; - String source = fs::path(params.getConnectionURL()) / params.getContainer() / current_path_with_metadata.relative_path; + String source = fs::path(configuration.connection_params.endpoint.getEndpoint()) / current_path_with_metadata.relative_path; auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addColumns(key, columns); } @@ -1331,12 +1328,11 @@ namespace || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) return; - const auto & params = configuration.connection_params; - auto host_and_bucket = params.getConnectionURL() + '/' + params.getContainer(); + auto endpoint = fs::path(configuration.connection_params.endpoint.getEndpoint()); Strings sources; sources.reserve(read_keys.size()); - std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; }); + std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem) { return endpoint / elem.relative_path; }); auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); StorageAzureBlob::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); } @@ -1376,9 +1372,8 @@ namespace return std::nullopt; }; - const auto & params = configuration.connection_params; - auto host_and_bucket = params.getConnectionURL() + '/' + params.getContainer(); - String source = host_and_bucket + '/' + it->relative_path; + auto endpoint = fs::path(configuration.connection_params.endpoint.getEndpoint()); + String source = endpoint / it->relative_path; if (format) { diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 936f32c3cb0..396934b4212 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -1,7 +1,4 @@ #pragma once - -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" -#include "Interpreters/Context_fwd.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp index fb311c74657..d72735bb47b 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp @@ -1,4 +1,3 @@ -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -7,11 +6,8 @@ #include #include #include - #include "registerTableFunctions.h" -#include - namespace DB { diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 78aaf26a2a7..c3204808d6f 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -789,7 +789,7 @@ def test_read_subcolumns(cluster): def test_read_from_not_existing_container(cluster): node = cluster.instances["node"] query = ( - f"select * from azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont_not_exists', 'test_table.csv', " + f"select * from azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont-not-exists', 'test_table.csv', " f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto')" ) expected_err_msg = "container does not exist" From d7de2ae0c9c37e9079ec575dc0a7ffeae5394206 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 13 May 2024 16:31:13 +0000 Subject: [PATCH 042/273] remove optimization for old analyzer --- .../RewriteAggregateFunctionWithIfPass.cpp | 26 +-- .../RewriteFunctionToSubcolumnVisitor.cpp | 216 ------------------ .../RewriteFunctionToSubcolumnVisitor.h | 52 ----- src/Interpreters/TreeOptimizer.cpp | 61 ----- .../01872_functions_to_subcolumns.reference | 47 ---- .../01872_functions_to_subcolumns.sql | 41 ---- .../0_stateless/02115_map_contains.reference | 4 - .../0_stateless/02115_map_contains.sql | 12 - .../0_stateless/02116_tuple_element.reference | 25 -- .../0_stateless/02116_tuple_element.sql | 42 ---- ...tions_to_subcolumns_column_names.reference | 5 - ...1_functions_to_subcolumns_column_names.sql | 6 - ...2971_functions_to_subcolumns_map.reference | 12 - .../02971_functions_to_subcolumns_map.sql | 12 - ..._functions_to_subcolumns_variant.reference | 4 - .../02971_functions_to_subcolumns_variant.sql | 6 - .../03003_functions_to_subcolumns_final.sql | 1 + 17 files changed, 2 insertions(+), 570 deletions(-) delete mode 100644 src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp delete mode 100644 src/Interpreters/RewriteFunctionToSubcolumnVisitor.h delete mode 100644 tests/queries/0_stateless/01872_functions_to_subcolumns.reference delete mode 100644 tests/queries/0_stateless/01872_functions_to_subcolumns.sql delete mode 100644 tests/queries/0_stateless/02115_map_contains.reference delete mode 100644 tests/queries/0_stateless/02115_map_contains.sql delete mode 100644 tests/queries/0_stateless/02116_tuple_element.reference delete mode 100644 tests/queries/0_stateless/02116_tuple_element.sql diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index 365bc28431a..58045c935aa 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -12,10 +12,7 @@ #include #include -<<<<<<< HEAD -======= #include ->>>>>>> upstream/master #include namespace DB @@ -102,16 +99,8 @@ public: FunctionFactory::instance().get("not", getContext())->build(not_function->getArgumentColumns())); new_arguments[1] = std::move(not_function); -<<<<<<< HEAD - function_arguments_nodes.resize(2); - function_arguments_nodes[0] = std::move(if_arguments_nodes[2]); - function_arguments_nodes[1] = std::move(not_function); - resolveAsAggregateFunctionWithIf(*function_node); -======= function_arguments_nodes = std::move(new_arguments); - resolveAsAggregateFunctionWithIf( - *function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()}); ->>>>>>> upstream/master + resolveAsAggregateFunctionWithIf(*function_node); } } } @@ -120,21 +109,8 @@ private: static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node) { auto result_type = function_node.getResultType(); -<<<<<<< HEAD const auto * suffix = result_type->isNullable() ? "OrNullIf" : "If"; resolveAggregateFunctionNodeByName(function_node, function_node.getFunctionName() + suffix); -======= - - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( - function_node.getFunctionName() + "If", - function_node.getNullsAction(), - argument_types, - function_node.getAggregateFunction()->getParameters(), - properties); - - function_node.resolveAsAggregateFunction(std::move(aggregate_function)); ->>>>>>> upstream/master } }; diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp deleted file mode 100644 index b1c79d4ecb7..00000000000 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ /dev/null @@ -1,216 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace -{ - -ASTPtr transformToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - return std::make_shared(Nested::concatenateName(name_in_storage, subcolumn_name)); -} - -ASTPtr transformEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("equals", ast, std::make_shared(0u)); -} - -ASTPtr transformNotEmptyToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("notEquals", ast, std::make_shared(0u)); -} - -ASTPtr transformIsNotNullToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("not", ast); -} - -ASTPtr transformCountNullableToSubcolumn(const String & name_in_storage, const String & subcolumn_name) -{ - auto ast = transformToSubcolumn(name_in_storage, subcolumn_name); - return makeASTFunction("sum", makeASTFunction("not", ast)); -} - -const std::unordered_map, String, decltype(&transformToSubcolumn)>> unary_function_to_subcolumn = -{ - {"length", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformToSubcolumn}}, - {"empty", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformEmptyToSubcolumn}}, - {"notEmpty", {{TypeIndex::Array, TypeIndex::Map}, "size0", transformNotEmptyToSubcolumn}}, - {"isNull", {{TypeIndex::Nullable}, "null", transformToSubcolumn}}, - {"isNotNull", {{TypeIndex::Nullable}, "null", transformIsNotNullToSubcolumn}}, - {"count", {{TypeIndex::Nullable}, "null", transformCountNullableToSubcolumn}}, - {"mapKeys", {{TypeIndex::Map}, "keys", transformToSubcolumn}}, - {"mapValues", {{TypeIndex::Map}, "values", transformToSubcolumn}}, -}; - -std::optional getColumnFromArgumentsToOptimize( - const ASTs & arguments, - const StorageMetadataPtr & metadata_snapshot) -{ - if (arguments.empty() || arguments.size() > 2) - return {}; - - const auto * identifier = arguments[0]->as(); - if (!identifier) - return {}; - - const auto & columns = metadata_snapshot->getColumns(); - const auto & name_in_storage = identifier->name(); - - if (!columns.has(name_in_storage)) - return {}; - - const auto & column_type = columns.get(name_in_storage).type; - if (column_type->hasDynamicSubcolumns()) - return {}; - - return NameAndTypePair{name_in_storage, column_type}; -} - -} - -void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTPtr & ast, Data & data) -{ - if (const auto * identifier = ast->as()) - { - ++data.indentifiers_count[identifier->name()]; - return; - } - - if (const auto * function = ast->as()) - { - visit(*function, data); - return; - } -} - -void RewriteFunctionToSubcolumnFirstPassMatcher::visit(const ASTFunction & function, Data & data) -{ - const auto & arguments = function.arguments->children; - auto column = getColumnFromArgumentsToOptimize(arguments, data.metadata_snapshot); - if (!column) - return; - - auto column_type_id = column->type->getTypeId(); - - if (arguments.size() == 1) - { - auto it = unary_function_to_subcolumn.find(function.name); - if (it == unary_function_to_subcolumn.end()) - return; - - const auto & expected_types_id = std::get<0>(it->second); - if (expected_types_id.contains(column_type_id)) - ++data.optimized_identifiers_count[column->name]; - } - else if (arguments.size() == 2) - { - if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) - { - const auto * literal = arguments[1]->as(); - if (!literal) - return; - - auto value_type = literal->value.getType(); - if (value_type == Field::Types::UInt64 || value_type == Field::Types::String) - ++data.optimized_identifiers_count[column->name]; - } - else if (function.name == "variantElement" && column_type_id == TypeIndex::Variant) - { - const auto * literal = arguments[1]->as(); - if (literal && literal->value.getType() == Field::Types::String) - ++data.optimized_identifiers_count[column->name]; - } - else if (function.name == "mapContains" && column_type_id == TypeIndex::Map) - { - ++data.optimized_identifiers_count[column->name]; - } - } -} - -void RewriteFunctionToSubcolumnSecondPassData::visit(ASTFunction & function, ASTPtr & ast) const -{ - const auto & arguments = function.arguments->children; - auto column = getColumnFromArgumentsToOptimize(arguments, metadata_snapshot); - if (!column) - return; - - auto column_type_id = column->type->getTypeId(); - auto alias = function.getAliasOrColumnName(); - - if (arguments.size() == 1) - { - auto it = unary_function_to_subcolumn.find(function.name); - if (it == unary_function_to_subcolumn.end()) - return; - - const auto & [expected_types_id, subcolumn_name, transformer] = it->second; - if (!expected_types_id.contains(column_type_id)) - return; - - ast = transformer(column->name, subcolumn_name); - ast->setAlias(alias); - } - else if (arguments.size() == 2) - { - if (function.name == "tupleElement" && column_type_id == TypeIndex::Tuple) - { - const auto * literal = arguments[1]->as(); - if (!literal) - return; - - String subcolumn_name; - auto value_type = literal->value.getType(); - if (value_type == Field::Types::UInt64) - { - const auto & type_tuple = assert_cast(*column->type); - auto index = literal->value.get(); - subcolumn_name = type_tuple.getNameByPosition(index); - } - else if (value_type == Field::Types::String) - { - subcolumn_name = literal->value.get(); - } - else - { - return; - } - - ast = transformToSubcolumn(column->name, subcolumn_name); - ast->setAlias(alias); - } - else if (function.name == "variantElement" && column_type_id == TypeIndex::Variant) - { - const auto * literal = arguments[1]->as(); - if (!literal) - return; - - String subcolumn_name; - auto value_type = literal->value.getType(); - if (value_type != Field::Types::String) - return; - - subcolumn_name = literal->value.get(); - ast = transformToSubcolumn(column->name, subcolumn_name); - ast->setAlias(alias); - } - else if (function.name == "mapContains" && column_type_id == TypeIndex::Map) - { - auto subcolumn = transformToSubcolumn(column->name, "keys"); - ast = makeASTFunction("has", subcolumn, arguments[1]); - ast->setAlias(alias); - } - } -} - -} diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h deleted file mode 100644 index 08eb6e27c52..00000000000 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.h +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class ASTFunction; -class ASTIdentifier; - -/// Collects info about identifiers to select columns to optimize to subcolumns. -class RewriteFunctionToSubcolumnFirstPassMatcher -{ -public: - struct Data - { - explicit Data(StorageMetadataPtr metadata_snapshot_) : metadata_snapshot(std::move(metadata_snapshot_)) {} - - StorageMetadataPtr metadata_snapshot; - std::unordered_map indentifiers_count; - std::unordered_map optimized_identifiers_count; - }; - - static void visit(const ASTPtr & ast, Data & data); - static void visit(const ASTFunction & function, Data & data); - static bool needChildVisit(ASTPtr & , ASTPtr &) { return true; } -}; - -using RewriteFunctionToSubcolumnFirstPassVisitor = InDepthNodeVisitor; - -/// Rewrites functions to subcolumns, if possible, to reduce amount of read data. -/// E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' -class RewriteFunctionToSubcolumnSecondPassData -{ -public: - using TypeToVisit = ASTFunction; - void visit(ASTFunction & function, ASTPtr & ast) const; - - RewriteFunctionToSubcolumnSecondPassData(StorageMetadataPtr metadata_snapshot_, NameSet identifiers_to_optimize_) - : metadata_snapshot(std::move(metadata_snapshot_)), identifiers_to_optimize(std::move(identifiers_to_optimize_)) - { - } - - StorageMetadataPtr metadata_snapshot; - NameSet identifiers_to_optimize; -}; - -using RewriteFunctionToSubcolumnSecondPassMatcher = OneTypeMatcher; -using RewriteFunctionToSubcolumnSecondPassVisitor = InDepthNodeVisitor; - -} diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index d01a922bfd0..b88d75cd5a2 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -564,63 +563,6 @@ void transformIfStringsIntoEnum(ASTPtr & query) ConvertStringsToEnumVisitor(convert_data).visit(query); } -void optimizeFunctionsToSubcolumns(ASTPtr & query, const TreeRewriterResult & result) -{ - if (!result.storage || !result.storage->supportsOptimizationToSubcolumns() || !result.storage_snapshot) - return; - - const auto & metadata_snapshot = result.storage_snapshot->metadata; - const auto & select_query = assert_cast(*query); - - /// For queries with FINAL converting function to subcolumn may alter - /// special merging algorithms and produce wrong result of query. - if (select_query.final()) - return; - - NameSet all_key_columns; - - const auto & primary_key_columns = result.storage_snapshot->metadata->getColumnsRequiredForPrimaryKey(); - all_key_columns.insert(primary_key_columns.begin(), primary_key_columns.end()); - - const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); - all_key_columns.insert(partition_key_columns.begin(), partition_key_columns.end()); - - for (const auto & index : metadata_snapshot->getSecondaryIndices()) - { - const auto & index_columns = index.expression->getRequiredColumns(); - all_key_columns.insert(index_columns.begin(), index_columns.end()); - } - - /// Do not optimize if full column is requested in other context. - /// It doesn't make sense because it doesn't reduce amount of read data - /// and optimized functions are not computation heavy. But introducing - /// new identifier complicates query analysis and may break it. - /// - /// E.g. query: - /// SELECT n FROM table GROUP BY n HAVING isNotNull(n) - /// may be optimized to incorrect query: - /// SELECT n FROM table GROUP BY n HAVING not(n.null) - /// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys) - /// - /// Do not optimize index columns (primary, min-max, secondary), - /// because otherwise analysis of indexes may be broken. - /// TODO: handle subcolumns in index analysis. - - RewriteFunctionToSubcolumnFirstPassVisitor::Data data(metadata_snapshot); - RewriteFunctionToSubcolumnFirstPassVisitor(data).visit(query); - - NameSet identifiers_to_optimize; - for (const auto & [identifier, count] : data.optimized_identifiers_count) - if (!all_key_columns.contains(identifier) && data.indentifiers_count[identifier] == count) - identifiers_to_optimize.insert(identifier); - - if (identifiers_to_optimize.empty()) - return; - - RewriteFunctionToSubcolumnSecondPassVisitor::Data rewrite_data(metadata_snapshot, identifiers_to_optimize); - RewriteFunctionToSubcolumnSecondPassVisitor(rewrite_data).visit(query); -} - void optimizeOrLikeChain(ASTPtr & query) { ConvertFunctionOrLikeVisitor::Data data = {}; @@ -685,9 +627,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, if (!select_query) throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts."); - if (settings.optimize_functions_to_subcolumns) - optimizeFunctionsToSubcolumns(query, result); - /// Move arithmetic operations out of aggregation functions if (settings.optimize_arithmetic_operations_in_aggregate_functions) optimizeAggregationFunctions(query); diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns.reference deleted file mode 100644 index 8c4017d6030..00000000000 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns.reference +++ /dev/null @@ -1,47 +0,0 @@ -0 0 1 -0 1 0 -SELECT - id IS NULL, - `n.null` AS `isNull(n)`, - NOT `n.null` AS `isNotNull(n)` -FROM t_func_to_subcolumns -3 0 1 0 -0 1 0 \N -SELECT - `arr.size0` AS `length(arr)`, - `arr.size0` = 0 AS `empty(arr)`, - `arr.size0` != 0 AS `notEmpty(arr)`, - empty(n) -FROM t_func_to_subcolumns -['foo','bar'] [1,2] -[] [] -SELECT - `m.keys` AS `mapKeys(m)`, - `m.values` AS `mapValues(m)` -FROM t_func_to_subcolumns -1 -SELECT sum(NOT `n.null`) AS `count(n)` -FROM t_func_to_subcolumns -2 -SELECT count(id) -FROM t_func_to_subcolumns -1 0 0 -2 1 0 -3 0 0 -SELECT - id, - `n.null` AS `isNull(n)`, - right.n IS NULL -FROM t_func_to_subcolumns AS left -ALL FULL OUTER JOIN -( - SELECT - 1 AS id, - \'qqq\' AS n - UNION ALL - SELECT - 3 AS id, - \'www\' -) AS right USING (id) -0 10 -0 20 diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns.sql b/tests/queries/0_stateless/01872_functions_to_subcolumns.sql deleted file mode 100644 index 45f83bf20e5..00000000000 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns.sql +++ /dev/null @@ -1,41 +0,0 @@ -DROP TABLE IF EXISTS t_func_to_subcolumns; - -SET optimize_functions_to_subcolumns = 1; - -CREATE TABLE t_func_to_subcolumns (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) -ENGINE = MergeTree ORDER BY tuple(); - -INSERT INTO t_func_to_subcolumns VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map()); - -SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT id IS NULL, n IS NULL, n IS NOT NULL FROM t_func_to_subcolumns; - -SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT length(arr), empty(arr), notEmpty(arr), empty(n) FROM t_func_to_subcolumns; - -SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT mapKeys(m), mapValues(m) FROM t_func_to_subcolumns; - -SELECT count(n) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT count(n) FROM t_func_to_subcolumns; - -SELECT count(id) FROM t_func_to_subcolumns; -EXPLAIN SYNTAX SELECT count(id) FROM t_func_to_subcolumns; - -SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left -FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); - -EXPLAIN SYNTAX SELECT id, left.n IS NULL, right.n IS NULL FROM t_func_to_subcolumns AS left -FULL JOIN (SELECT 1 AS id, 'qqq' AS n UNION ALL SELECT 3 AS id, 'www') AS right USING(id); - -DROP TABLE t_func_to_subcolumns; - -DROP TABLE IF EXISTS t_tuple_null; - -CREATE TABLE t_tuple_null (t Tuple(null UInt32)) ENGINE = MergeTree ORDER BY tuple(); - -INSERT INTO t_tuple_null VALUES ((10)), ((20)); - -SELECT t IS NULL, t.null FROM t_tuple_null; - -DROP TABLE t_tuple_null; diff --git a/tests/queries/0_stateless/02115_map_contains.reference b/tests/queries/0_stateless/02115_map_contains.reference deleted file mode 100644 index e4ae4f951ba..00000000000 --- a/tests/queries/0_stateless/02115_map_contains.reference +++ /dev/null @@ -1,4 +0,0 @@ -SELECT has(`m.keys`, \'a\') AS `mapContains(m, \'a\')` -FROM t_map_contains -1 -0 diff --git a/tests/queries/0_stateless/02115_map_contains.sql b/tests/queries/0_stateless/02115_map_contains.sql deleted file mode 100644 index 3c7f21cb4f1..00000000000 --- a/tests/queries/0_stateless/02115_map_contains.sql +++ /dev/null @@ -1,12 +0,0 @@ -DROP TABLE IF EXISTS t_map_contains; - -CREATE TABLE t_map_contains (m Map(String, UInt32)) ENGINE = Memory; - -INSERT INTO t_map_contains VALUES (map('a', 1, 'b', 2)), (map('c', 3, 'd', 4)); - -SET optimize_functions_to_subcolumns = 1; - -EXPLAIN SYNTAX SELECT mapContains(m, 'a') FROM t_map_contains; -SELECT mapContains(m, 'a') FROM t_map_contains; - -DROP TABLE t_map_contains; diff --git a/tests/queries/0_stateless/02116_tuple_element.reference b/tests/queries/0_stateless/02116_tuple_element.reference deleted file mode 100644 index a8004f5e74c..00000000000 --- a/tests/queries/0_stateless/02116_tuple_element.reference +++ /dev/null @@ -1,25 +0,0 @@ -1 -SELECT `t1.a` AS `tupleElement(t1, 1)` -FROM t_tuple_element -a -SELECT `t1.s` AS `tupleElement(t1, 2)` -FROM t_tuple_element -1 -SELECT `t1.a` AS `tupleElement(t1, \'a\')` -FROM t_tuple_element -2 -SELECT `t2.1` AS `tupleElement(t2, 1)` -FROM t_tuple_element -2 -SELECT `t2.1` AS `tupleElement(t2, 1)` -FROM t_tuple_element -1 2 -WITH (1, 2) AS t -SELECT - t.1, - t.2 -1 2 -WITH CAST(\'(1, 2)\', \'Tuple(a UInt32, b UInt32)\') AS t -SELECT - t.1, - tupleElement(t, \'b\') diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element.sql deleted file mode 100644 index e3a5134f2b2..00000000000 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ /dev/null @@ -1,42 +0,0 @@ -DROP TABLE IF EXISTS t_tuple_element; - -CREATE TABLE t_tuple_element(t1 Tuple(a UInt32, s String), t2 Tuple(UInt32, String)) ENGINE = Memory; -INSERT INTO t_tuple_element VALUES ((1, 'a'), (2, 'b')); - -SET optimize_functions_to_subcolumns = 1; - -SELECT t1.1 FROM t_tuple_element; -EXPLAIN SYNTAX SELECT t1.1 FROM t_tuple_element; - -SELECT tupleElement(t1, 2) FROM t_tuple_element; -EXPLAIN SYNTAX SELECT tupleElement(t1, 2) FROM t_tuple_element; - -SELECT tupleElement(t1, 'a') FROM t_tuple_element; -EXPLAIN SYNTAX SELECT tupleElement(t1, 'a') FROM t_tuple_element; - -SELECT tupleElement(number, 1) FROM numbers(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } - -SELECT t2.1 FROM t_tuple_element; -EXPLAIN SYNTAX SELECT t2.1 FROM t_tuple_element; - -SELECT tupleElement(t2, 1) FROM t_tuple_element; -EXPLAIN SYNTAX SELECT tupleElement(t2, 1) FROM t_tuple_element; - -SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } -SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError ARGUMENT_OUT_OF_BOUND, NOT_FOUND_COLUMN_IN_BLOCK } -SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT, NOT_FOUND_COLUMN_IN_BLOCK } - -DROP TABLE t_tuple_element; - -WITH (1, 2) AS t SELECT t.1, t.2; -EXPLAIN SYNTAX WITH (1, 2) AS t SELECT t.1, t.2; - -WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); -EXPLAIN SYNTAX WITH (1, 2)::Tuple(a UInt32, b UInt32) AS t SELECT t.1, tupleElement(t, 'b'); diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference index 03c16267db1..3389ea44074 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.reference @@ -1,8 +1,3 @@ -SELECT - `arr.size0` AS `length(arr)`, - `n.null` AS `isNull(n)` -FROM t_column_names -{"length(arr)":"3","isNull(n)":0} SELECT __table1.`arr.size0` AS `length(arr)`, __table1.`n.null` AS `isNull(n)` diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql index b867148c8ca..48e5232d18b 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_column_names.sql @@ -4,12 +4,6 @@ CREATE TABLE t_column_names (arr Array(UInt64), n Nullable(String)) ENGINE = Mem INSERT INTO t_column_names VALUES ([1, 2, 3], 'foo'); -SET optimize_functions_to_subcolumns = 1; -SET allow_experimental_analyzer = 0; - -EXPLAIN SYNTAX SELECT length(arr), isNull(n) FROM t_column_names; -SELECT length(arr), isNull(n) FROM t_column_names FORMAT JSONEachRow; - SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference index 50f21842ac1..9488291c8ff 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.reference @@ -1,15 +1,3 @@ -SELECT `m.size0` AS `length(m)` -FROM t_func_to_subcolumns_map -2 -1 -SELECT `m.size0` = 0 AS `empty(m)` -FROM t_func_to_subcolumns_map -0 -0 -SELECT `m.size0` != 0 AS `notEmpty(m)` -FROM t_func_to_subcolumns_map -1 -1 SELECT __table1.`m.size0` AS `length(m)` FROM default.t_func_to_subcolumns_map AS __table1 2 diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql index c574e1033c0..e8a752a82d5 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_map.sql @@ -4,18 +4,6 @@ CREATE TABLE t_func_to_subcolumns_map (id UInt64, m Map(String, UInt64)) ENGINE INSERT INTO t_func_to_subcolumns_map VALUES (1, map('aaa', 1, 'bbb', 2)) (2, map('ccc', 3)); -SET optimize_functions_to_subcolumns = 1; -SET allow_experimental_analyzer = 0; - -EXPLAIN SYNTAX SELECT length(m) FROM t_func_to_subcolumns_map; -SELECT length(m) FROM t_func_to_subcolumns_map; - -EXPLAIN SYNTAX SELECT empty(m) FROM t_func_to_subcolumns_map; -SELECT empty(m) FROM t_func_to_subcolumns_map; - -EXPLAIN SYNTAX SELECT notEmpty(m) FROM t_func_to_subcolumns_map; -SELECT notEmpty(m) FROM t_func_to_subcolumns_map; - SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference index 7a52155fc2d..04616738a15 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.reference @@ -1,7 +1,3 @@ -SELECT `v.String` AS `variantElement(v, \'String\')` -FROM t_func_to_subcolumns_variant -foo -\N SELECT __table1.`v.String` AS `variantElement(v, \'String\')` FROM default.t_func_to_subcolumns_variant AS __table1 foo diff --git a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql index 1cedd877289..511bcc44514 100644 --- a/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql +++ b/tests/queries/0_stateless/02971_functions_to_subcolumns_variant.sql @@ -6,12 +6,6 @@ CREATE TABLE t_func_to_subcolumns_variant (id UInt64, v Variant(String, UInt64)) INSERT INTO t_func_to_subcolumns_variant VALUES (1, 'foo') (2, 111); -SET optimize_functions_to_subcolumns = 1; -SET allow_experimental_analyzer = 0; - -EXPLAIN SYNTAX SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; -SELECT variantElement(v, 'String') FROM t_func_to_subcolumns_variant; - SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql index 5975347ad09..3fe29139c5f 100644 --- a/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql +++ b/tests/queries/0_stateless/03003_functions_to_subcolumns_final.sql @@ -1,6 +1,7 @@ DROP TABLE IF EXISTS t_length_1; DROP TABLE IF EXISTS t_length_2; +SET optimize_functions_to_subcolumns = 1; SET allow_experimental_analyzer = 1; SET optimize_on_insert = 0; From 35038f2458f2642700ee685a5c4f43729228d8a0 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 14 May 2024 14:23:24 +0000 Subject: [PATCH 043/273] rename parameter --- .../test_backup_restore_azure_blob_storage/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index 1a1458cb68e..6765a519a6d 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -30,12 +30,12 @@ def generate_cluster_def(port): DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:{port}/devstoreaccount1; - cont + cont CSV http://azurite1:{port}/devstoreaccount1 - cont + cont CSV devstoreaccount1 Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== From 0a1daa00e0f8b57f73e64a7f9794ce1e7cef2c86 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 14 May 2024 14:40:38 +0000 Subject: [PATCH 044/273] fix functions with if --- .../Passes/RewriteAggregateFunctionWithIfPass.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index 58045c935aa..45f3469b48e 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -71,7 +71,7 @@ public: new_arguments[1] = std::move(if_arguments_nodes[0]); function_arguments_nodes = std::move(new_arguments); - resolveAsAggregateFunctionWithIf(*function_node); + resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If"); } } else if (first_const_node) @@ -100,18 +100,10 @@ public: new_arguments[1] = std::move(not_function); function_arguments_nodes = std::move(new_arguments); - resolveAsAggregateFunctionWithIf(*function_node); + resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If"); } } } - -private: - static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node) - { - auto result_type = function_node.getResultType(); - const auto * suffix = result_type->isNullable() ? "OrNullIf" : "If"; - resolveAggregateFunctionNodeByName(function_node, function_node.getFunctionName() + suffix); - } }; } From 29250418cb5d666b81043695a1df09e4df94e539 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 17 May 2024 20:43:40 +0000 Subject: [PATCH 045/273] fix backward incompatibility --- .../AzureBlobStorage/AzureBlobStorageCommon.cpp | 15 +++++++++++++-- .../test.py | 4 ++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp index a39cc89b93b..11253d25e3d 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp @@ -92,6 +92,17 @@ Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String container_name; String prefix; + auto get_container_name = [&] + { + if (config.has(config_prefix + ".container_name")) + return config.getString(config_prefix + ".container_name"); + + if (config.has(config_prefix + ".container")) + return config.getString(config_prefix + ".container"); + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `container` or `container_name` parameter in config"); + }; + if (config.has(config_prefix + ".endpoint")) { String endpoint = config.getString(config_prefix + ".endpoint"); @@ -154,13 +165,13 @@ Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const else if (config.has(config_prefix + ".connection_string")) { storage_url = config.getString(config_prefix + ".connection_string"); - container_name = config.getString(config_prefix + ".container_name"); + container_name = get_container_name(); } else if (config.has(config_prefix + ".storage_account_url")) { storage_url = config.getString(config_prefix + ".storage_account_url"); validateStorageAccountUrl(storage_url); - container_name = config.getString(config_prefix + ".container_name"); + container_name = get_container_name(); } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `storage_account_url` or `connection_string` or `endpoint` in config"); diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index 6765a519a6d..1a1458cb68e 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -30,12 +30,12 @@ def generate_cluster_def(port): DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:{port}/devstoreaccount1; - cont + cont CSV http://azurite1:{port}/devstoreaccount1 - cont + cont CSV devstoreaccount1 Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== From 47c7b7fccf38f9ae3180e1a456262388bd39e129 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 19 Mar 2024 16:01:48 +0100 Subject: [PATCH 046/273] add tests for non replicated mt --- src/Core/Settings.h | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 18 ++- .../Transforms/buildPushingToViewsChain.cpp | 6 + src/Storages/MergeTree/MergeTreeDataWriter.h | 2 + src/Storages/MergeTree/MergeTreeSink.cpp | 26 ++++ .../MergeTree/MergedBlockOutputStream.cpp | 5 + .../MergeTree/ReplicatedMergeTreeSink.cpp | 2 +- ..._non_replicated_deduplication_mv.reference | 0 .../03008_non_replicated_deduplication_mv.sql | 93 ++++++++++++++ ...eduplication_mv_collision_in_dst.reference | 0 ...ated_deduplication_mv_collision_in_dst.sql | 113 +++++++++++++++++ ...lision_in_dst_from_different_src.reverence | 0 ...mv_collision_in_dst_from_different_src.sql | 119 ++++++++++++++++++ ...eduplication_mv_collision_in_src.reference | 0 ...ated_deduplication_mv_collision_in_src.sql | 76 +++++++++++ 15 files changed, 456 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference create mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c555b5cb208..491e888e3e0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -34,7 +34,7 @@ class IColumn; M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ - M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ + M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size in rows for reading", 0) \ M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \ M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \ M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \ diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 12677c422b8..d1a9ead480e 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -306,6 +306,9 @@ Chain InterpreterInsertQuery::buildSink( ThreadGroupPtr running_group, std::atomic_uint64_t * elapsed_counter_ms) { + LOG_DEBUG(getLogger("InsertQuery"), + "called InterpreterInsertQuery::buildSink() engine {} table name {}.{}", table->getName(), table->getStorageID().database_name, table->getStorageID().table_name); + ThreadStatus * thread_status = current_thread; if (!thread_status_holder) @@ -465,16 +468,17 @@ BlockIO InterpreterInsertQuery::execute() * to avoid unnecessary squashing. */ + LOG_DEBUG(getLogger("InsertQuery"), + "execute() is_trivial_insert_select=true prefersLargeBlocks={}", table->prefersLargeBlocks()); + Settings new_settings = getContext()->getSettings(); new_settings.max_threads = std::max(1, settings.max_insert_threads); if (table->prefersLargeBlocks()) { - if (settings.min_insert_block_size_rows) - new_settings.max_block_size = settings.min_insert_block_size_rows; - if (settings.min_insert_block_size_bytes) - new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; + new_settings.max_block_size = std::max(settings.min_insert_block_size_rows, settings.max_block_size); + new_settings.preferred_block_size_bytes = std::max(settings.min_insert_block_size_bytes, settings.preferred_block_size_bytes); } auto new_context = Context::createCopy(context); @@ -527,6 +531,7 @@ BlockIO InterpreterInsertQuery::execute() /// Deduplication when passing insert_deduplication_token breaks if using more than one thread if (!settings.insert_deduplication_token.toString().empty()) { + /// TODO! LOG_DEBUG( getLogger("InsertQuery"), "Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues"); @@ -566,8 +571,13 @@ BlockIO InterpreterInsertQuery::execute() running_group = std::make_shared(getContext()); for (size_t i = 0; i < sink_streams_size; ++i) { + LOG_DEBUG(getLogger("InsertQuery"), + "call buildSink table name {}.{}, stream {}/{}", + table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams_size); + auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, running_group, /* elapsed_counter_ms= */ nullptr); + sink_chains.emplace_back(std::move(out)); } for (size_t i = 0; i < pre_streams_size; ++i) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 5e8ecdca95e..66264a46d9d 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -223,6 +223,8 @@ std::optional generateViewChain( else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views && !insert_settings.insert_deduplication_token.value.empty()) { + + /// TODO! /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle * deduplication in complex INSERT flows. * @@ -252,6 +254,8 @@ std::optional generateViewChain( else insert_deduplication_token += "_" + view_id.getFullNameNotQuoted(); + LOG_DEBUG(getLogger("PushingToViews"), "insert_deduplication_token {}", insert_deduplication_token); + insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); } @@ -483,6 +487,8 @@ Chain buildPushingToViewsChain( for (const auto & view_id : views) { + LOG_ERROR(&Poco::Logger::get("PushingToViews"), "dependent view: {}.{}", view_id.database_name, view_id.table_name); + try { auto out = generateViewChain( diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 863c951d957..3e47e3705b9 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -47,6 +47,8 @@ public: : data(data_) , log(getLogger(data.getLogName() + " (Writer)")) { + LOG_WARNING(log, "MergeTreeDataWriter() called from:\n{}", StackTrace().toString()); + } /** Split the block to blocks, each of them must be written as separate part. diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index b7dede3cb00..f0eb56aea13 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -40,6 +40,8 @@ MergeTreeSink::MergeTreeSink( , context(context_) , storage_snapshot(storage.getStorageSnapshotWithoutData(metadata_snapshot, context_)) { + LOG_INFO(storage.log, "MergeTreeSink() called for {}.{}", + storage_.getStorageID().database_name, storage_.getStorageID().getTableName()); } void MergeTreeSink::onStart() @@ -56,6 +58,10 @@ void MergeTreeSink::onFinish() void MergeTreeSink::consume(Chunk chunk) { + LOG_INFO(storage.log, "consume() called num_blocks_processed {}, chunks: rows {} columns {} bytes {}", + num_blocks_processed, + chunk.getNumRows(), chunk.getNumColumns(), chunk.bytes()); + if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(nullptr, context, false); @@ -65,6 +71,8 @@ void MergeTreeSink::consume(Chunk chunk) auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context); + LOG_INFO(storage.log, "consume() called part_blocks.count {}", part_blocks.size()); + using DelayedPartitions = std::vector; DelayedPartitions partitions; @@ -121,8 +129,16 @@ void MergeTreeSink::consume(Chunk chunk) else max_insert_delayed_streams_for_parallel_write = 0; + LOG_INFO(storage.log, "consume() called for {}.{} " + "streams {} + {} -> {}, " + "max {} support_parallel_write {}", + storage.getStorageID().database_name, storage.getStorageID().getTableName(), + streams, temp_part.streams.size(), streams + temp_part.streams.size(), + max_insert_delayed_streams_for_parallel_write, support_parallel_write); + /// In case of too much columns/parts in block, flush explicitly. streams += temp_part.streams.size(); + if (streams > max_insert_delayed_streams_for_parallel_write) { finishDelayedChunk(); @@ -156,8 +172,12 @@ void MergeTreeSink::finishDelayedChunk() if (!delayed_chunk) return; + LOG_INFO(storage.log, "finishDelayedChunk() called partitions count {}", delayed_chunk->partitions.size()); + for (auto & partition : delayed_chunk->partitions) { + LOG_INFO(storage.log, "finishDelayedChunk() part name {} dedup_token {}", partition.temp_part.part->name, partition.block_dedup_token); + ProfileEventsScope scoped_attach(&partition.part_counters); partition.temp_part.finalize(); @@ -174,9 +194,15 @@ void MergeTreeSink::finishDelayedChunk() storage.fillNewPartName(part, lock); auto * deduplication_log = storage.getDeduplicationLog(); + + LOG_INFO(storage.log, "finishDelayedChunk() has dedup log {}", bool(deduplication_log)); + if (deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); + + LOG_INFO(storage.log, "finishDelayedChunk() block_dedup_token={}, block_id={}", partition.block_dedup_token, block_id); + auto res = deduplication_log->addPart(block_id, part->info); if (!res.second) { diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index d8555d69788..12bc284f68c 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -50,6 +50,8 @@ MergedBlockOutputStream::MergedBlockOutputStream( data_part->storeVersionMetadata(); writer = data_part->getWriter(columns_list, metadata_snapshot, skip_indices, statistics, default_codec, writer_settings, computed_index_granularity); + + LOG_WARNING(getLogger("MergedBlockOutputStream()"), "called c-tor"); } /// If data is pre-sorted. @@ -329,6 +331,9 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Permutation * permutation) { + LOG_WARNING(getLogger("MergedBlockOutputStream()"), "writeImpl block rows {} size {} getPartDirectory {}", + block.rows(), block.bytes(), data_part_storage->getPartDirectory()); + block.checkNumberOfRows(); size_t rows = block.rows(); if (!rows) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 4b4f4c33e7d..2bb9aad1e53 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -535,7 +535,7 @@ bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::Mutabl ProfileEventsScope profile_events_scope; String original_part_dir = part->getDataPartStorage().getPartDirectory(); - auto try_rollback_part_rename = [this, &part, &original_part_dir]() + auto try_rollback_part_rename = [this, &part, &original_part_dir] () { if (original_part_dir == part->getDataPartStorage().getPartDirectory()) return; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql new file mode 100644 index 00000000000..8f718508ee8 --- /dev/null +++ b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql @@ -0,0 +1,93 @@ +DROP TABLE IF EXISTS table_a_b; +DROP TABLE IF EXISTS table_when_b_even; +DROP TABLE IF EXISTS mv_b_even; + + +SET max_insert_threads=1; +SET update_insert_deduplication_token_in_dependent_materialized_views=1; +SET deduplicate_blocks_in_dependent_materialized_views=1; + +SET max_block_size=3; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + + +CREATE TABLE table_a_b + ( + a String, + b UInt64, + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_a_b; + +CREATE TABLE table_when_b_even_wo_dedup + ( + a String, + b UInt64, + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=0; +SYSTEM STOP MERGES table_when_b_even; + +CREATE MATERIALIZED VIEW mv_b_even_wo_dedup +TO table_when_b_even_wo_dedup +AS + SELECT a, b + FROM table_a_b + WHERE b % 2 = 0; + +CREATE TABLE table_when_b_even_dedup + ( + a String, + b UInt64, + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_when_b_even; + +CREATE MATERIALIZED VIEW mv_b_even_dedup +TO table_when_b_even_dedup +AS + SELECT a, b + FROM table_a_b + WHERE b % 2 = 0; + + +SELECT 'first insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT toString(number DIV 2), number +FROM numbers(5) +SETTINGS send_logs_level='trace'; + + +SELECT 'second insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT toString(number DIV 2), number +FROM numbers(5) +SETTINGS send_logs_level='trace'; + + +SELECT 'table_a_b'; +SELECT 'count', count() FROM table_a_b; +SELECT _part, count() FROM table_a_b GROUP BY _part; + +SELECT 'table_when_b_even_wo_dedup'; +SELECT 'count', count() FROM table_when_b_even_wo_dedup; +SELECT _part, count() FROM table_when_b_even_wo_dedup GROUP BY _part; + +SELECT 'table_when_b_even_dedup'; +SELECT 'count', count() FROM table_when_b_even_dedup; +SELECT _part, count() FROM table_when_b_even_dedup GROUP BY _part; + + +DROP TABLE mv_b_even; +DROP TABLE table_when_b_even; +DROP TABLE table_a_b; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql new file mode 100644 index 00000000000..46b9bd52144 --- /dev/null +++ b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql @@ -0,0 +1,113 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE table_for_join_with + ( + a_join String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a_join, b); + +INSERT INTO table_for_join_with + SELECT 'joined_' || toString(number), number + FROM numbers(10); +SELECT 'table_for_join_with'; +SELECT a_join, b, _part FROM table_for_join_with ORDER BY _part, a_join, b; + + +CREATE TABLE table_a_b + ( + a_src String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a_src, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_a_b; + +CREATE TABLE table_when_b_even_dedup + ( + a_src String CODEC(NONE), + a_join String CODEC(NONE), + b UInt64 CODEC(NONE) + ) + ENGINE = MergeTree() + ORDER BY (a_src, a_join, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_when_b_even_dedup; + +CREATE MATERIALIZED VIEW mv_b_even_dedup + TO table_when_b_even_dedup + AS + SELECT a_src, a_join, b + FROM table_a_b + FULL OUTER JOIN table_for_join_with + ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 + ORDER BY a_src, a_join, b; + +CREATE TABLE table_when_b_even_wo_dedup + ( + a_src String CODEC(NONE), + a_join String CODEC(NONE), + b UInt64 CODEC(NONE) + ) + ENGINE = MergeTree() + ORDER BY (a_src, a_join, b) + SETTINGS non_replicated_deduplication_window=0; +SYSTEM STOP MERGES table_when_b_even_wo_dedup; + +CREATE MATERIALIZED VIEW mv_b_even_wo_dedup + TO table_when_b_even_wo_dedup + AS + SELECT a_src, a_join, b + FROM table_a_b + FULL OUTER JOIN table_for_join_with + ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 + ORDER BY a_src, a_join, b; + + +SET max_insert_threads=1; +SET update_insert_deduplication_token_in_dependent_materialized_views=1; +SET deduplicate_blocks_in_dependent_materialized_views=1; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + + +SELECT 'first insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT 'source_' || toString(number), number +FROM numbers(5) +SETTINGS send_logs_level='trace'; + + +SELECT 'second insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT 'source_' || toString(number), number +FROM numbers(5) +SETTINGS send_logs_level='trace'; + + +SELECT 'table_a_b'; +SELECT 'count', count() FROM table_a_b; +SELECT _part, count() FROM table_a_b GROUP BY _part; + +SELECT 'table_when_b_even_dedup, here the result if join is deduplicated inside one request, it is not correct'; +SELECT 'count', count() FROM table_when_b_even_dedup; +SELECT _part, count() FROM table_when_b_even_dedup GROUP BY _part; + +SELECT 'table_when_b_even_wo_dedup'; +SELECT 'count', count() FROM table_when_b_even_wo_dedup; +SELECT _part, count() FROM table_when_b_even_wo_dedup GROUP BY _part ORDER BY _part; + + +DROP TABLE mv_b_even_dedup; +DROP TABLE table_when_b_even_dedup; +DROP TABLE mv_b_even_wo_dedup; +DROP TABLE table_when_b_even_wo_dedup; +DROP TABLE table_a_b; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql new file mode 100644 index 00000000000..02546af69dc --- /dev/null +++ b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql @@ -0,0 +1,119 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE table_source + ( + a String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_source; + +CREATE TABLE table_dst_dedup + ( + a String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_dst_dedup; + +CREATE MATERIALIZED VIEW mv_b_even_dedup + TO table_dst_dedup + AS + SELECT a, b + FROM table_source + WHERE b % 2 = 0; + +CREATE MATERIALIZED VIEW mv_b_even_even_dedup + TO table_dst_dedup + AS + SELECT a, b + FROM table_source + WHERE b % 4 = 0; + +CREATE TABLE table_dst_wo_dedup + ( + a String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=0; +SYSTEM STOP MERGES table_dst_wo_dedup; + +CREATE MATERIALIZED VIEW mv_b_even_wo_dedup + TO table_dst_wo_dedup + AS + SELECT a, b + FROM table_source + WHERE b % 2 = 0; + +CREATE MATERIALIZED VIEW mv_b_even_wo_even_dedup + TO table_dst_wo_dedup + AS + SELECT a, b + FROM table_source + WHERE b % 4 = 0; + + +SET max_insert_threads=1; +SET update_insert_deduplication_token_in_dependent_materialized_views=1; +SET deduplicate_blocks_in_dependent_materialized_views=1; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + + +SELECT 'first insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_source +SELECT 'source_' || toString(number), number +FROM numbers(8) +SETTINGS send_logs_level='trace'; + +SELECT 'table_source'; +SELECT 'count', count() FROM table_source; +SELECT _part, count() FROM table_source GROUP BY _part ORDER BY _part; + +SELECT 'table_dst_dedup'; +SELECT 'count', count() FROM table_dst_dedup; +SELECT _part, count() FROM table_dst_dedup GROUP BY _part ORDER BY _part; + +SELECT 'table_dst_wo_dedup'; +SELECT 'count', count() FROM table_dst_wo_dedup; +SELECT _part, count() FROM table_dst_wo_dedup GROUP BY _part ORDER BY _part; + + +SELECT 'second insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_source +SELECT 'source_' || toString(number), number +FROM numbers(8) +SETTINGS send_logs_level='trace'; + +SELECT 'table_source'; +SELECT 'count', count() FROM table_source; +SELECT _part, count() FROM table_source GROUP BY _part ORDER BY _part; + +SELECT 'table_dst_dedup, block from different mv is deduplicated, it is wrong'; +SELECT 'count', count() FROM table_dst_dedup; +SELECT _part, count() FROM table_dst_dedup GROUP BY _part ORDER BY _part; + +SELECT 'table_dst_wo_dedup'; +SELECT 'count', count() FROM table_dst_wo_dedup; +SELECT _part, count() FROM table_dst_wo_dedup GROUP BY _part ORDER BY _part; + + +DROP TABLE mv_b_even_dedup; +DROP TABLE mv_b_even_even_dedup; +DROP TABLE mv_b_even_wo_dedup; +DROP TABLE mv_b_even_even_wo_dedup; +DROP TABLE table_dst_dedup; +DROP TABLE table_dst_wo_dedup; +DROP TABLE table_source; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql new file mode 100644 index 00000000000..213b449dd73 --- /dev/null +++ b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql @@ -0,0 +1,76 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE table_a_b + ( + a String, + b UInt64 + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_a_b; + +CREATE TABLE table_when_b_even + ( + a String CODEC(NONE), + b UInt64 CODEC(NONE) + ) + ENGINE = MergeTree() + ORDER BY (a, b) + SETTINGS non_replicated_deduplication_window=10000; +SYSTEM STOP MERGES table_when_b_even; + +CREATE MATERIALIZED VIEW mv_b_even + TO table_when_b_even + AS + SELECT a, b + FROM table_a_b + WHERE b % 2 = 0; + + +SET max_insert_threads=1; +SET update_insert_deduplication_token_in_dependent_materialized_views=1; +SET deduplicate_blocks_in_dependent_materialized_views=1; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + + +SELECT 'first insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT 'source_' || toString(1), 1 +FROM numbers(5) +SETTINGS send_logs_level='trace'; + +SELECT 'table_a_b, it deduplicates rows within one insert, it is wrong'; +SELECT 'count', count() FROM table_a_b; +SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part; + +SELECT 'table_when_b_even'; +SELECT 'count', count() FROM table_when_b_even; +SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part; + + +SELECT 'second insert' +SETTINGS send_logs_level='trace'; + +INSERT INTO table_a_b +SELECT 'source_' || toString(1), 1 +FROM numbers(5) +SETTINGS send_logs_level='trace'; + +SELECT 'table_a_b'; +SELECT 'count', count() FROM table_a_b; +SELECT _part, count() FROM table_a_b GROUP BY _part; + +SELECT 'table_when_b_even'; +SELECT 'count', count() FROM table_when_b_even; +SELECT _part, count() FROM table_when_b_even GROUP BY _part; + + +DROP TABLE mv_b_even; +DROP TABLE table_when_b_even; +DROP TABLE table_a_b; From 8b7563040c39dc11647606cdd9f8d77dc6e4cc84 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 19 Mar 2024 19:06:42 +0100 Subject: [PATCH 047/273] non replicated inserts with deduplication user token --- src/Interpreters/InterpreterInsertQuery.cpp | 32 +- .../Transforms/buildPushingToViewsChain.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataWriter.h | 3 +- .../MergeTree/MergedBlockOutputStream.cpp | 4 +- .../0_stateless/03008_deduplication.python | 561 +++++++++++ ...uplication_insert_several_blocks.reference | 870 ++++++++++++++++++ ...008_deduplication_insert_several_blocks.sh | 92 ++ ...tion_mv_generates_several_blocks.reference | 814 ++++++++++++++++ ...duplication_mv_generates_several_blocks.sh | 98 ++ ...cation_several_mv_into_one_table.reference | 590 ++++++++++++ ...deduplication_several_mv_into_one_table.sh | 106 +++ ..._non_replicated_deduplication_mv.reference | 0 .../03008_non_replicated_deduplication_mv.sql | 93 -- ...eduplication_mv_collision_in_dst.reference | 0 ...ated_deduplication_mv_collision_in_dst.sql | 113 --- ...lision_in_dst_from_different_src.reverence | 0 ...mv_collision_in_dst_from_different_src.sql | 119 --- ...eduplication_mv_collision_in_src.reference | 0 ...ated_deduplication_mv_collision_in_src.sql | 76 -- 19 files changed, 3151 insertions(+), 422 deletions(-) create mode 100644 tests/queries/0_stateless/03008_deduplication.python create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference delete mode 100644 tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index d1a9ead480e..a5396be9b76 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -519,29 +519,29 @@ BlockIO InterpreterInsertQuery::execute() if (settings.max_insert_threads > 1) { - auto table_id = table->getStorageID(); - auto views = DatabaseCatalog::instance().getDependentViews(table_id); + pre_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); - /// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them. - /// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts. - const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert(); - pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads - : std::min(settings.max_insert_threads, pipeline.getNumStreams()); - /// Deduplication when passing insert_deduplication_token breaks if using more than one thread - if (!settings.insert_deduplication_token.toString().empty()) - { - /// TODO! - LOG_DEBUG( - getLogger("InsertQuery"), - "Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues"); - pre_streams_size = 1; - } +// /// Deduplication when passing insert_deduplication_token breaks if using more than one thread +// if (!settings.insert_deduplication_token.toString().empty()) +// { +// /// TODO! +// LOG_DEBUG( +// getLogger("InsertQuery"), +// "Insert-select query using insert_deduplication_token, setting streams from {} to 1 to avoid deduplication issues, pipeline.getNumStreams() {}", +// pre_streams_size, pipeline.getNumStreams()); +// pre_streams_size = 1; +// } if (table->supportsParallelInsert()) sink_streams_size = pre_streams_size; } + LOG_DEBUG( + getLogger("InsertQuery"), + "pre_streams_size {}, pipeline.getNumStreams() {}", + pre_streams_size, pipeline.getNumStreams()); + pipeline.resize(pre_streams_size); /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 66264a46d9d..70f30faa5b1 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -487,7 +487,7 @@ Chain buildPushingToViewsChain( for (const auto & view_id : views) { - LOG_ERROR(&Poco::Logger::get("PushingToViews"), "dependent view: {}.{}", view_id.database_name, view_id.table_name); + LOG_DEBUG(&Poco::Logger::get("PushingToViews"), "dependent view: {}.{}", view_id.database_name, view_id.table_name); try { diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 3e47e3705b9..a9a44813545 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -47,8 +47,7 @@ public: : data(data_) , log(getLogger(data.getLogName() + " (Writer)")) { - LOG_WARNING(log, "MergeTreeDataWriter() called from:\n{}", StackTrace().toString()); - + LOG_DEBUG(log, "MergeTreeDataWriter() called from:\n{}", StackTrace().toString()); } /** Split the block to blocks, each of them must be written as separate part. diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 12bc284f68c..fd2b05f615e 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -51,7 +51,7 @@ MergedBlockOutputStream::MergedBlockOutputStream( writer = data_part->getWriter(columns_list, metadata_snapshot, skip_indices, statistics, default_codec, writer_settings, computed_index_granularity); - LOG_WARNING(getLogger("MergedBlockOutputStream()"), "called c-tor"); + LOG_DEBUG(getLogger("MergedBlockOutputStream()"), "called c-tor"); } /// If data is pre-sorted. @@ -331,7 +331,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Permutation * permutation) { - LOG_WARNING(getLogger("MergedBlockOutputStream()"), "writeImpl block rows {} size {} getPartDirectory {}", + LOG_DEBUG(getLogger("MergedBlockOutputStream()"), "writeImpl block rows {} size {} getPartDirectory {}", block.rows(), block.bytes(), data_part_storage->getPartDirectory()); block.checkNumberOfRows(); diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python new file mode 100644 index 00000000000..3cd29247910 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication.python @@ -0,0 +1,561 @@ +#!/usr/bin/env python3 + +import os +import sys +import argparse +import string + + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) + + +def __format(template, **params): + field_names = [v[1] for v in string.Formatter().parse(template) if v[1] is not None] + kv_args = {} + for field in field_names: + if field in params: + kv_args[field] = params[field] + else: + kv_args[field] = "" + + return template.format(**kv_args) + + +def instance_create_statement(table_name, table_columns, table_keys, table_engine, with_deduplication, no_merges=True): + template = """ + CREATE TABLE {table_name} + {table_columns} + ENGINE = {table_engine} + ORDER BY {table_keys} + {table_settings}; + {table_no_merges} + """ + + params = dict() + params["table_name"] = table_name + params["table_columns"] = table_columns + params["table_keys"] = table_keys + params["table_no_merges"] = f"SYSTEM STOP MERGES {table_name};" if no_merges else "" + params["table_engine"] = "MergeTree()" if table_engine == "MergeTree" else f"ReplicatedMergeTree('/clickhouse/tables/{{database}}/{table_name}', '1')" + + deduplication_window_setting_name = "non_replicated_deduplication_window" if table_engine == "MergeTree" else "replicated_deduplication_window" + deduplication_window_setting_value = 1000 if with_deduplication else 0 + + settings = list() + settings += [f"{deduplication_window_setting_name}={deduplication_window_setting_value}"] + params["table_settings"] = "SETTINGS " + ",".join(settings) + + return __format(template, **params) + + +def instance_insert_statement(table_name, count, insert_unique_blocks, use_insert_token): + template = """ + INSERT INTO {table_name} + SELECT {insert_columns} + FROM numbers({count}) {insert_settings}; + """ + return __format( + template, + table_name=table_name, + count=count, + insert_columns="'src_4', 4" if not insert_unique_blocks else "'src_' || toString(number), number", + insert_settings="" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'", + ) + + +def get_drop_tables_statements(tables): + return "".join([f"DROP TABLE IF EXISTS {table_name};\n" for table_name in tables[::-1]]) + + +def get_logs_statement(args): + if args.get_logs: + return "SET send_logs_level='test';" + return "" + +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +class ArgsFactory: + def __init__(self, parser): + self.__parser = parser + + def add_opt_engine(self): + self.__parser.add_argument( + "--table-engine", choices=["ReplicatedMergeTree", "MergeTree"], default="MergeTree") + + def add_opt_user_token(self): + self.__parser.add_argument("--use-insert-token", type=str2bool, nargs='?', const=True, default=False) + + def add_opt_single_thread(self): + self.__parser.add_argument("--single-thread", type=str2bool, nargs='?', const=True, default=True) + + def add_opt_dedup_src(self): + self.__parser.add_argument("--deduplicate-src-table", type=str2bool, nargs='?', const=True, default=True) + + def add_opt_dedup_dst(self): + self.__parser.add_argument("--deduplicate-dst-table", type=str2bool, nargs='?', const=True, default=True) + + def add_opt_get_logs(self): + self.__parser.add_argument("--get-logs", type=str2bool, nargs='?', const=True, default=False) + + def add_opt_uniq_blocks(self): + self.__parser.add_argument("--insert-unique-blocks", type=str2bool, nargs='?', const=True, default=True) + + def add_all(self): + self.add_opt_engine() + self.add_opt_user_token() + self.add_opt_single_thread() + self.add_opt_dedup_src() + self.add_opt_dedup_dst() + self.add_opt_get_logs() + self.add_opt_uniq_blocks() + + +def test_insert_several_blocks(parser): + ArgsFactory(parser).add_all() + + def calle(args): + create_table_a_b_statement = instance_create_statement( + table_name="table_a_b", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_src_table, + ) + + create_table_when_b_even_statement = instance_create_statement( + table_name="table_when_b_even", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_dst_table, + ) + + create_mv_statement = """ + CREATE MATERIALIZED VIEW mv_b_even + TO table_when_b_even + AS + SELECT a, b + FROM table_a_b + WHERE b % 2 = 0; + """ + + drop_tables_statements = get_drop_tables_statements( ["table_a_b", "table_when_b_even", "mv_b_even"] ) + + insert_statement = instance_insert_statement( + "table_a_b", 10, args.insert_unique_blocks, args.use_insert_token + ) + + print_details_statements = f""" + SELECT 'table_a_b'; + SELECT 'count', count() FROM table_a_b; + {"" if not args.get_logs else "SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part;"} + + SELECT 'table_when_b_even'; + SELECT 'count', count() FROM table_when_b_even; + {"" if not args.get_logs else "SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part;"} + """ + + + + if args.insert_unique_blocks: + assert_first_insert_statements = f""" + SELECT throwIf( count() != 10 ) + FROM table_a_b; + SELECT throwIf( count() != 5 ) + FROM table_when_b_even; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) + FROM table_a_b; + SELECT throwIf( count() != {5 if args.deduplicate_dst_table else 10} ) + FROM table_when_b_even; + """ + else: + if args.use_insert_token: + assert_first_insert_statements = """ + SELECT throwIf( count() != 10 ) + FROM table_a_b; + SELECT throwIf( count() != 10 ) + FROM table_when_b_even; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) + FROM table_a_b; + SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) + FROM table_when_b_even; + """ + else: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 10} ) + FROM table_when_b_even; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 20} ) + FROM table_a_b; + SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 20} ) + FROM table_when_b_even; + """ + + script = f""" + {get_logs_statement(args)} + + SET max_insert_threads={1 if args.single_thread else 10}; + SET update_insert_deduplication_token_in_dependent_materialized_views=1; + SET deduplicate_blocks_in_dependent_materialized_views=1; + + SET max_block_size=1; + SET min_insert_block_size_rows=0; + SET min_insert_block_size_bytes=0; + + {drop_tables_statements} + + {create_table_a_b_statement} + + {create_table_when_b_even_statement} + + {create_mv_statement} + + -- first insert + {insert_statement} + + {print_details_statements} + + {assert_first_insert_statements} + + -- second insert, it is retry + {insert_statement} + + {print_details_statements} + + {assert_second_insert_statements} + + {drop_tables_statements} + """ + + print(script) + + parser.set_defaults(func=calle) + + +def test_mv_generates_several_blocks(parser): + ArgsFactory(parser).add_all() + + def calle(args): + tables = ["table_for_join_with", "table_a_b", "table_when_b_even_and_joined", "mv_b_even"] + drop_tables_statements = get_drop_tables_statements(tables) + + details_print_for_table_for_join_with = "" + if args.get_logs: + details_print_for_table_for_join_with = """ + SELECT 'table_for_join_with'; + SELECT a_join, b, _part FROM table_for_join_with ORDER BY _part, a_join, b; + """ + + create_table_a_b_statement = instance_create_statement( + table_name="table_a_b", + table_columns="(a_src String, b UInt64)", + table_keys="(a_src, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_src_table, + ) + + create_table_when_b_even_and_joined_statement = instance_create_statement( + table_name="table_when_b_even_and_joined", + table_columns="(a_src String, a_join String, b UInt64)", + table_keys="(a_src, a_join, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_dst_table, + ) + + insert_statement = instance_insert_statement( + "table_a_b", 5, args.insert_unique_blocks, args.use_insert_token + ) + + details_print_statements = f""" + SELECT 'table_a_b'; + SELECT 'count', count() FROM table_a_b; + + SELECT 'table_when_b_even_and_joined'; + SELECT 'count', count() FROM table_when_b_even_and_joined; + {"" if not args.get_logs else "SELECT _part, a_src, a_join, b FROM table_when_b_even_and_joined ORDER BY _part;"} + """ + + if args.insert_unique_blocks: + assert_first_insert_statements = f""" + SELECT throwIf( count() != 5 ) + FROM table_a_b; + + SELECT throwIf( count() != 47 ) + FROM table_when_b_even_and_joined; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + + SELECT throwIf( count() != {47 if args.deduplicate_dst_table else 94} ) + FROM table_when_b_even_and_joined; + """ + else: + if args.use_insert_token: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {5 if args.deduplicate_src_table else 5} ) + FROM table_a_b; + + SELECT throwIf( count() != {45 if args.deduplicate_dst_table else 45} ) + FROM table_when_b_even_and_joined; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + + SELECT throwIf( count() != {45 if args.deduplicate_dst_table else 90} ) + FROM table_when_b_even_and_joined; + """ + else: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 5} ) + FROM table_a_b; + + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 45} ) + FROM table_when_b_even_and_joined; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 90} ) + FROM table_when_b_even_and_joined; + """ + + script = f""" + {get_logs_statement(args)} + + SET max_insert_threads={1 if args.single_thread else 10}; + SET update_insert_deduplication_token_in_dependent_materialized_views=1; + SET deduplicate_blocks_in_dependent_materialized_views=1; + + SET max_block_size=1; + SET min_insert_block_size_rows=0; + SET min_insert_block_size_bytes=0; + + {drop_tables_statements} + + CREATE TABLE table_for_join_with + (a_join String, b UInt64) + ENGINE = MergeTree() + ORDER BY (a_join, b); + INSERT INTO table_for_join_with + SELECT 'joined_' || toString(number), number + FROM numbers(9); + {details_print_for_table_for_join_with} + + {create_table_a_b_statement} + SYSTEM STOP MERGES table_a_b; + + {create_table_when_b_even_and_joined_statement} + SYSTEM STOP MERGES table_when_b_even_and_joined; + + CREATE MATERIALIZED VIEW mv_b_even + TO table_when_b_even_and_joined + AS + SELECT a_src, a_join, table_for_join_with.b as b + FROM table_a_b + FULL OUTER JOIN table_for_join_with + ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 + ORDER BY a_src, a_join, b; + + -- first insert + {insert_statement} + + {details_print_statements} + + -- first assertion + {assert_first_insert_statements} + + -- second insert + {insert_statement} + + {details_print_statements} + + -- second assertion + {assert_second_insert_statements} + + {drop_tables_statements} + """ + + print(script) + + parser.set_defaults(func=calle) + + +def test_several_mv_into_one_table(parser): + ArgsFactory(parser).add_all() + + def calle(args): + tables = ["table_src", "table_dst", "mv_b_even", "mv_b_even_even"] + drop_tables_statements = get_drop_tables_statements(tables) + + create_table_src_statement = instance_create_statement( + table_name="table_src", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_src_table, + ) + + create_table_dst_statement = instance_create_statement( + table_name="table_dst", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_dst_table, + ) + + insert_statement = instance_insert_statement( + "table_src", 8, args.insert_unique_blocks, args.use_insert_token + ) + + details_print_statements = f""" + SELECT 'table_src count', count() FROM table_src; + + SELECT 'table_dst count', count() FROM table_dst; + {"" if not args.get_logs else "SELECT _part, count() FROM table_dst GROUP BY _part ORDER BY _part;"} + """ + + if args.insert_unique_blocks: + assert_first_insert_statements = f""" + SELECT throwIf( count() != 8 ) + FROM table_src; + + SELECT throwIf( count() != 6 ) + FROM table_dst; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) + FROM table_src; + + SELECT throwIf( count() != {6 if args.deduplicate_dst_table else 12} ) + FROM table_dst; + """ + else: + if args.use_insert_token: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {8 if args.deduplicate_src_table else 8} ) + FROM table_src; + + SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 16} ) + FROM table_dst; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) + FROM table_src; + + SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 32} ) + FROM table_dst; + """ + else: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 8} ) + FROM table_src; + + SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 16} ) + FROM table_dst; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 16} ) + FROM table_src; + + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 32} ) + FROM table_dst; + """ + + script = f""" + {get_logs_statement(args)} + + SET max_insert_threads={1 if args.single_thread else 10}; + SET update_insert_deduplication_token_in_dependent_materialized_views=1; + SET deduplicate_blocks_in_dependent_materialized_views=1; + + SET max_block_size=1; + SET min_insert_block_size_rows=0; + SET min_insert_block_size_bytes=0; + + {drop_tables_statements} + + {create_table_src_statement} + + {create_table_dst_statement} + + CREATE MATERIALIZED VIEW mv_b_even + TO table_dst + AS + SELECT a, b + FROM table_src + WHERE b % 2 = 0; + + CREATE MATERIALIZED VIEW mv_b_even_even + TO table_dst + AS + SELECT a, b + FROM table_src + WHERE b % 4 = 0; + + -- first insert + {insert_statement} + + {details_print_statements} + + {assert_first_insert_statements} + + -- second insert, retry + {insert_statement} + + {details_print_statements} + + {assert_second_insert_statements} + + {drop_tables_statements} + """ + + print(script) + + parser.set_defaults(func=calle) + + +def parse_args(): + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers(dest="test") + test_insert_several_blocks( + subparsers.add_parser("insert_several_blocks_into_table") + ) + test_mv_generates_several_blocks( + subparsers.add_parser("mv_generates_several_blocks") + ) + test_several_mv_into_one_table( + subparsers.add_parser("several_mv_into_one_table") + ) + args = parser.parse_args() + if args.test is None: + parser.print_help() + return args + + +def main(): + args = parse_args() + if args.test is not None: + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference new file mode 100644 index 00000000000..35b2642a4d2 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference @@ -0,0 +1,870 @@ + +Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even +count 1 +EXPECTED_TO_FAIL + +Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +EXPECTED_TO_FAIL + +Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even +count 5 +EXPECTED_TO_FAIL + +Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +EXPECTED_TO_FAIL + +Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 1 +0 +EXPECTED_TO_FAIL + +Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +EXPECTED_TO_FAIL + +Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even +count 1 +EXPECTED_TO_FAIL + +Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +EXPECTED_TO_FAIL + +Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even +count 5 +EXPECTED_TO_FAIL + +Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +EXPECTED_TO_FAIL + +Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 1 +0 +EXPECTED_TO_FAIL + +Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +EXPECTED_TO_FAIL + +Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh new file mode 100755 index 00000000000..5b07f6033ad --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +# fails, it is a error. Several blocks in scr table with the same user token are processed in parallel and deduplicated + +# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" +# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False" +# fails, it is a error. The same situation as first one, but on dst table. + +RUN_ONLY="" +#RUN_ONLY="" + +KNOWN_ERRORS=(8 9 10 11 12 13) + +function is_known_error() +{ + n=$1 + for e in "${KNOWN_ERRORS[@]}"; do + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + return 0 + fi + done + return 1 +} + +i=0 +for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference new file mode 100644 index 00000000000..eccdbd52f37 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference @@ -0,0 +1,814 @@ + +Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +EXPECTED_TO_FAIL + +Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +EXPECTED_TO_FAIL + +Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even_and_joined +count 47 +EXPECTED_TO_FAIL + +Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +EXPECTED_TO_FAIL + +Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +EXPECTED_TO_FAIL + +Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +EXPECTED_TO_FAIL + +Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +EXPECTED_TO_FAIL + +Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +EXPECTED_TO_FAIL + +Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 1 +table_when_b_even_and_joined +count 47 +EXPECTED_TO_FAIL + +Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +EXPECTED_TO_FAIL + +Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +EXPECTED_TO_FAIL + +Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +EXPECTED_TO_FAIL + +Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh new file mode 100755 index 00000000000..1dd648583c6 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +# failed due to race in multi thread insertion, blocks are deduplicated in different threads + +# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +# the same as first but for dst table + +# Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# dst table deduplicates all incoming blocks from one insert because not uniq hash + +RUN_ONLY="" +#RUN_ONLY="Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +KNOWN_ERRORS=(8 9 10 11 12 13 16 20 24 28) + +function is_known_error() +{ + n=$1 + for e in "${KNOWN_ERRORS[@]}"; do + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + return 0 + fi + done + return 1 +} + +i=0 +for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference new file mode 100644 index 00000000000..12eea604e3a --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference @@ -0,0 +1,590 @@ + +Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 1 +table_dst count 2 +EXPECTED_TO_FAIL + +Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +EXPECTED_TO_FAIL + +Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 1 +table_dst count 6 +EXPECTED_TO_FAIL + +Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +EXPECTED_TO_FAIL + +Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 2 +0 +EXPECTED_TO_FAIL + +Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +EXPECTED_TO_FAIL + +Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 1 +table_dst count 2 +EXPECTED_TO_FAIL + +Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +EXPECTED_TO_FAIL + +Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 1 +table_dst count 6 +EXPECTED_TO_FAIL + +Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +EXPECTED_TO_FAIL + +Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 2 +0 +EXPECTED_TO_FAIL + +Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +EXPECTED_TO_FAIL + +Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh new file mode 100755 index 00000000000..487b3ac5f88 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +# race condition on insert into src table + +# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +# race condition on insert into dst table + +# Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +# Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +# dst deduplicates blocks from one inserts from different materialized view + +# Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +# Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +# Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +# Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +# dst deduplicates blocks from different inserts by hash + +KNOWN_ERRORS=(8 9 10 11 12 13 16 20 24 28 17 21 25 29) + +function is_known_error() +{ + n=$1 + for e in "${KNOWN_ERRORS[@]}"; do + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + return 0 + fi + done + return 1 +} + +RUN_ONLY="" +#RUN_ONLY="Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done + done + done + done + done +done + +echo +echo "All cases executed" + + diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql deleted file mode 100644 index 8f718508ee8..00000000000 --- a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv.sql +++ /dev/null @@ -1,93 +0,0 @@ -DROP TABLE IF EXISTS table_a_b; -DROP TABLE IF EXISTS table_when_b_even; -DROP TABLE IF EXISTS mv_b_even; - - -SET max_insert_threads=1; -SET update_insert_deduplication_token_in_dependent_materialized_views=1; -SET deduplicate_blocks_in_dependent_materialized_views=1; - -SET max_block_size=3; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - - -CREATE TABLE table_a_b - ( - a String, - b UInt64, - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_a_b; - -CREATE TABLE table_when_b_even_wo_dedup - ( - a String, - b UInt64, - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=0; -SYSTEM STOP MERGES table_when_b_even; - -CREATE MATERIALIZED VIEW mv_b_even_wo_dedup -TO table_when_b_even_wo_dedup -AS - SELECT a, b - FROM table_a_b - WHERE b % 2 = 0; - -CREATE TABLE table_when_b_even_dedup - ( - a String, - b UInt64, - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_when_b_even; - -CREATE MATERIALIZED VIEW mv_b_even_dedup -TO table_when_b_even_dedup -AS - SELECT a, b - FROM table_a_b - WHERE b % 2 = 0; - - -SELECT 'first insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT toString(number DIV 2), number -FROM numbers(5) -SETTINGS send_logs_level='trace'; - - -SELECT 'second insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT toString(number DIV 2), number -FROM numbers(5) -SETTINGS send_logs_level='trace'; - - -SELECT 'table_a_b'; -SELECT 'count', count() FROM table_a_b; -SELECT _part, count() FROM table_a_b GROUP BY _part; - -SELECT 'table_when_b_even_wo_dedup'; -SELECT 'count', count() FROM table_when_b_even_wo_dedup; -SELECT _part, count() FROM table_when_b_even_wo_dedup GROUP BY _part; - -SELECT 'table_when_b_even_dedup'; -SELECT 'count', count() FROM table_when_b_even_dedup; -SELECT _part, count() FROM table_when_b_even_dedup GROUP BY _part; - - -DROP TABLE mv_b_even; -DROP TABLE table_when_b_even; -DROP TABLE table_a_b; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql deleted file mode 100644 index 46b9bd52144..00000000000 --- a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst.sql +++ /dev/null @@ -1,113 +0,0 @@ -DROP TABLE IF EXISTS test; - -CREATE TABLE table_for_join_with - ( - a_join String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a_join, b); - -INSERT INTO table_for_join_with - SELECT 'joined_' || toString(number), number - FROM numbers(10); -SELECT 'table_for_join_with'; -SELECT a_join, b, _part FROM table_for_join_with ORDER BY _part, a_join, b; - - -CREATE TABLE table_a_b - ( - a_src String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a_src, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_a_b; - -CREATE TABLE table_when_b_even_dedup - ( - a_src String CODEC(NONE), - a_join String CODEC(NONE), - b UInt64 CODEC(NONE) - ) - ENGINE = MergeTree() - ORDER BY (a_src, a_join, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_when_b_even_dedup; - -CREATE MATERIALIZED VIEW mv_b_even_dedup - TO table_when_b_even_dedup - AS - SELECT a_src, a_join, b - FROM table_a_b - FULL OUTER JOIN table_for_join_with - ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 - ORDER BY a_src, a_join, b; - -CREATE TABLE table_when_b_even_wo_dedup - ( - a_src String CODEC(NONE), - a_join String CODEC(NONE), - b UInt64 CODEC(NONE) - ) - ENGINE = MergeTree() - ORDER BY (a_src, a_join, b) - SETTINGS non_replicated_deduplication_window=0; -SYSTEM STOP MERGES table_when_b_even_wo_dedup; - -CREATE MATERIALIZED VIEW mv_b_even_wo_dedup - TO table_when_b_even_wo_dedup - AS - SELECT a_src, a_join, b - FROM table_a_b - FULL OUTER JOIN table_for_join_with - ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 - ORDER BY a_src, a_join, b; - - -SET max_insert_threads=1; -SET update_insert_deduplication_token_in_dependent_materialized_views=1; -SET deduplicate_blocks_in_dependent_materialized_views=1; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - - -SELECT 'first insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT 'source_' || toString(number), number -FROM numbers(5) -SETTINGS send_logs_level='trace'; - - -SELECT 'second insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT 'source_' || toString(number), number -FROM numbers(5) -SETTINGS send_logs_level='trace'; - - -SELECT 'table_a_b'; -SELECT 'count', count() FROM table_a_b; -SELECT _part, count() FROM table_a_b GROUP BY _part; - -SELECT 'table_when_b_even_dedup, here the result if join is deduplicated inside one request, it is not correct'; -SELECT 'count', count() FROM table_when_b_even_dedup; -SELECT _part, count() FROM table_when_b_even_dedup GROUP BY _part; - -SELECT 'table_when_b_even_wo_dedup'; -SELECT 'count', count() FROM table_when_b_even_wo_dedup; -SELECT _part, count() FROM table_when_b_even_wo_dedup GROUP BY _part ORDER BY _part; - - -DROP TABLE mv_b_even_dedup; -DROP TABLE table_when_b_even_dedup; -DROP TABLE mv_b_even_wo_dedup; -DROP TABLE table_when_b_even_wo_dedup; -DROP TABLE table_a_b; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.reverence deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql deleted file mode 100644 index 02546af69dc..00000000000 --- a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_dst_from_different_src.sql +++ /dev/null @@ -1,119 +0,0 @@ -DROP TABLE IF EXISTS test; - -CREATE TABLE table_source - ( - a String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_source; - -CREATE TABLE table_dst_dedup - ( - a String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_dst_dedup; - -CREATE MATERIALIZED VIEW mv_b_even_dedup - TO table_dst_dedup - AS - SELECT a, b - FROM table_source - WHERE b % 2 = 0; - -CREATE MATERIALIZED VIEW mv_b_even_even_dedup - TO table_dst_dedup - AS - SELECT a, b - FROM table_source - WHERE b % 4 = 0; - -CREATE TABLE table_dst_wo_dedup - ( - a String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=0; -SYSTEM STOP MERGES table_dst_wo_dedup; - -CREATE MATERIALIZED VIEW mv_b_even_wo_dedup - TO table_dst_wo_dedup - AS - SELECT a, b - FROM table_source - WHERE b % 2 = 0; - -CREATE MATERIALIZED VIEW mv_b_even_wo_even_dedup - TO table_dst_wo_dedup - AS - SELECT a, b - FROM table_source - WHERE b % 4 = 0; - - -SET max_insert_threads=1; -SET update_insert_deduplication_token_in_dependent_materialized_views=1; -SET deduplicate_blocks_in_dependent_materialized_views=1; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - - -SELECT 'first insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_source -SELECT 'source_' || toString(number), number -FROM numbers(8) -SETTINGS send_logs_level='trace'; - -SELECT 'table_source'; -SELECT 'count', count() FROM table_source; -SELECT _part, count() FROM table_source GROUP BY _part ORDER BY _part; - -SELECT 'table_dst_dedup'; -SELECT 'count', count() FROM table_dst_dedup; -SELECT _part, count() FROM table_dst_dedup GROUP BY _part ORDER BY _part; - -SELECT 'table_dst_wo_dedup'; -SELECT 'count', count() FROM table_dst_wo_dedup; -SELECT _part, count() FROM table_dst_wo_dedup GROUP BY _part ORDER BY _part; - - -SELECT 'second insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_source -SELECT 'source_' || toString(number), number -FROM numbers(8) -SETTINGS send_logs_level='trace'; - -SELECT 'table_source'; -SELECT 'count', count() FROM table_source; -SELECT _part, count() FROM table_source GROUP BY _part ORDER BY _part; - -SELECT 'table_dst_dedup, block from different mv is deduplicated, it is wrong'; -SELECT 'count', count() FROM table_dst_dedup; -SELECT _part, count() FROM table_dst_dedup GROUP BY _part ORDER BY _part; - -SELECT 'table_dst_wo_dedup'; -SELECT 'count', count() FROM table_dst_wo_dedup; -SELECT _part, count() FROM table_dst_wo_dedup GROUP BY _part ORDER BY _part; - - -DROP TABLE mv_b_even_dedup; -DROP TABLE mv_b_even_even_dedup; -DROP TABLE mv_b_even_wo_dedup; -DROP TABLE mv_b_even_even_wo_dedup; -DROP TABLE table_dst_dedup; -DROP TABLE table_dst_wo_dedup; -DROP TABLE table_source; diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql b/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql deleted file mode 100644 index 213b449dd73..00000000000 --- a/tests/queries/0_stateless/03008_non_replicated_deduplication_mv_collision_in_src.sql +++ /dev/null @@ -1,76 +0,0 @@ -DROP TABLE IF EXISTS test; - -CREATE TABLE table_a_b - ( - a String, - b UInt64 - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_a_b; - -CREATE TABLE table_when_b_even - ( - a String CODEC(NONE), - b UInt64 CODEC(NONE) - ) - ENGINE = MergeTree() - ORDER BY (a, b) - SETTINGS non_replicated_deduplication_window=10000; -SYSTEM STOP MERGES table_when_b_even; - -CREATE MATERIALIZED VIEW mv_b_even - TO table_when_b_even - AS - SELECT a, b - FROM table_a_b - WHERE b % 2 = 0; - - -SET max_insert_threads=1; -SET update_insert_deduplication_token_in_dependent_materialized_views=1; -SET deduplicate_blocks_in_dependent_materialized_views=1; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - - -SELECT 'first insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT 'source_' || toString(1), 1 -FROM numbers(5) -SETTINGS send_logs_level='trace'; - -SELECT 'table_a_b, it deduplicates rows within one insert, it is wrong'; -SELECT 'count', count() FROM table_a_b; -SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part; - -SELECT 'table_when_b_even'; -SELECT 'count', count() FROM table_when_b_even; -SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part; - - -SELECT 'second insert' -SETTINGS send_logs_level='trace'; - -INSERT INTO table_a_b -SELECT 'source_' || toString(1), 1 -FROM numbers(5) -SETTINGS send_logs_level='trace'; - -SELECT 'table_a_b'; -SELECT 'count', count() FROM table_a_b; -SELECT _part, count() FROM table_a_b GROUP BY _part; - -SELECT 'table_when_b_even'; -SELECT 'count', count() FROM table_when_b_even; -SELECT _part, count() FROM table_when_b_even GROUP BY _part; - - -DROP TABLE mv_b_even; -DROP TABLE table_when_b_even; -DROP TABLE table_a_b; From 02c9a07778cdc6295d2ebf972c52de389e7edabb Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 12 Apr 2024 15:04:52 +0200 Subject: [PATCH 048/273] work in progress --- src/Common/CollectionOfDerived.h | 153 +++ src/Interpreters/AsynchronousInsertQueue.cpp | 8 +- src/Interpreters/InterpreterCheckQuery.cpp | 18 +- src/Interpreters/InterpreterCreateQuery.cpp | 11 +- src/Interpreters/InterpreterExplainQuery.cpp | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 615 +++++----- src/Interpreters/InterpreterInsertQuery.h | 17 +- src/Interpreters/SystemLog.cpp | 2 +- src/Processors/Chunk.cpp | 20 +- src/Processors/Chunk.h | 62 +- .../PullingAsyncPipelineExecutor.cpp | 9 +- .../Executors/PullingPipelineExecutor.cpp | 9 +- .../Formats/Impl/ParquetBlockOutputFormat.cpp | 4 +- src/Processors/IAccumulatingTransform.cpp | 5 +- .../FinishAggregatingInOrderAlgorithm.cpp | 17 +- .../Algorithms/MergeTreePartLevelInfo.h | 12 +- .../Algorithms/ReplacingSortedAlgorithm.cpp | 2 +- .../Algorithms/ReplacingSortedAlgorithm.h | 8 +- src/Processors/Merges/IMergingTransform.cpp | 2 +- src/Processors/Merges/IMergingTransform.h | 2 +- src/Processors/Sinks/RemoteSink.h | 2 +- src/Processors/Sinks/SinkToStorage.cpp | 6 +- src/Processors/Sinks/SinkToStorage.h | 37 +- src/Processors/Sources/BlocksSource.h | 5 +- src/Processors/Sources/RemoteSource.cpp | 2 +- .../Sources/SourceFromSingleChunk.cpp | 2 +- .../AggregatingInOrderTransform.cpp | 11 +- .../Transforms/AggregatingInOrderTransform.h | 5 +- .../Transforms/AggregatingTransform.cpp | 16 +- .../Transforms/AggregatingTransform.h | 4 +- src/Processors/Transforms/FilterTransform.cpp | 3 +- .../Transforms/JoiningTransform.cpp | 9 +- src/Processors/Transforms/JoiningTransform.h | 5 +- .../Transforms/MemoryBoundMerging.h | 6 +- ...gingAggregatedMemoryEfficientTransform.cpp | 36 +- ...ergingAggregatedMemoryEfficientTransform.h | 5 +- .../Transforms/MergingAggregatedTransform.cpp | 10 +- .../Transforms/NumberBlocksTransform.cpp | 1 + .../Transforms/NumberBlocksTransform.h | 224 ++++ .../Transforms/SelectByIndicesTransform.h | 3 +- .../Transforms/SquashingChunksTransform.cpp | 10 + .../Transforms/SquashingChunksTransform.h | 2 + .../Transforms/TotalsHavingTransform.cpp | 6 +- .../Transforms/buildPushingToViewsChain.cpp | 91 +- src/QueryPipeline/QueryPipelineBuilder.h | 2 +- src/QueryPipeline/QueryPlanResourceHolder.cpp | 8 +- src/QueryPipeline/QueryPlanResourceHolder.h | 3 + src/Storages/Distributed/DistributedSink.cpp | 8 +- src/Storages/Distributed/DistributedSink.h | 2 +- src/Storages/FileLog/StorageFileLog.cpp | 2 +- src/Storages/HDFS/StorageHDFS.cpp | 4 +- src/Storages/Kafka/StorageKafka.cpp | 2 +- src/Storages/LiveView/LiveViewSink.h | 4 +- src/Storages/MaterializedView/RefreshTask.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 19 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 1 + .../MergeTree/MergeTreeSelectProcessor.cpp | 6 +- .../MergeTree/MergeTreeSequentialSource.cpp | 5 +- src/Storages/MergeTree/MergeTreeSink.cpp | 50 +- src/Storages/MergeTree/MergeTreeSink.h | 4 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 54 +- .../MergeTree/ReplicatedMergeTreeSink.h | 3 +- src/Storages/MessageQueueSink.cpp | 2 +- src/Storages/MessageQueueSink.h | 2 +- src/Storages/NATS/StorageNATS.cpp | 2 +- src/Storages/PartitionedSink.cpp | 4 +- src/Storages/PartitionedSink.h | 2 +- .../MaterializedPostgreSQLConsumer.cpp | 2 +- .../PostgreSQLReplicationHandler.cpp | 2 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 2 +- src/Storages/RocksDB/EmbeddedRocksDBSink.cpp | 2 +- src/Storages/RocksDB/EmbeddedRocksDBSink.h | 2 +- .../RocksDB/StorageEmbeddedRocksDB.cpp | 3 +- src/Storages/S3Queue/StorageS3Queue.cpp | 2 +- src/Storages/StorageAzureBlob.cpp | 4 +- src/Storages/StorageBuffer.cpp | 4 +- src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageFile.cpp | 4 +- src/Storages/StorageKeeperMap.cpp | 9 +- src/Storages/StorageLog.cpp | 6 +- src/Storages/StorageMemory.cpp | 2 +- src/Storages/StorageMongoDB.cpp | 5 +- src/Storages/StorageMySQL.cpp | 4 +- src/Storages/StoragePostgreSQL.cpp | 4 +- src/Storages/StorageRedis.cpp | 9 +- src/Storages/StorageS3.cpp | 4 +- src/Storages/StorageSQLite.cpp | 2 +- src/Storages/StorageSet.cpp | 6 +- src/Storages/StorageStripeLog.cpp | 4 +- src/Storages/StorageURL.cpp | 4 +- src/Storages/StorageURL.h | 2 +- .../System/StorageSystemZooKeeper.cpp | 2 +- src/Storages/WindowView/StorageWindowView.cpp | 4 +- .../0_stateless/03008_deduplication.python | 140 ++- ...uplication_insert_several_blocks.reference | 1088 ++++++++++++++++- ...008_deduplication_insert_several_blocks.sh | 101 +- ...tion_mv_generates_several_blocks.reference | 1032 +++++++++++++++- ...duplication_mv_generates_several_blocks.sh | 99 +- ...cation_several_mv_into_one_table.reference | 784 +++++++++++- ...deduplication_several_mv_into_one_table.sh | 101 +- 100 files changed, 4107 insertions(+), 1004 deletions(-) create mode 100644 src/Common/CollectionOfDerived.h create mode 100644 src/Processors/Transforms/NumberBlocksTransform.cpp create mode 100644 src/Processors/Transforms/NumberBlocksTransform.h diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h new file mode 100644 index 00000000000..8579c4dd50c --- /dev/null +++ b/src/Common/CollectionOfDerived.h @@ -0,0 +1,153 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +template +class CollectionOfDerivedItems +{ +public: + using Self = CollectionOfDerivedItems; + using ItemPtr = std::shared_ptr; + +private: + struct Rec + { + std::type_index type_idx; + ItemPtr ptr; + + bool operator<(const Rec & other) const + { + return type_idx < other.type_idx; + } + + bool operator<(const std::type_index & value) const + { + return type_idx < value; + } + + bool operator==(const Rec & other) const + { + return type_idx == other.type_idx; + } + }; + using Records = std::vector; + +public: + void swap(Self & other) + { + records.swap(other.records); + } + + void clear() + { + records.clear(); + } + + bool empty() const + { + return records.empty(); + } + + size_t size() const + { + return records.size(); + } + + Self clone() const + { + Self result; + result.records.reserve(records.size()); + for (const auto & rec: records) + result.records.emplace_back(rec.type_idx, rec.ptr->clone()); + return result; + } + + void append(Self && other) + { + std::move(other.records.begin(), other.records.end(), std::back_inserter(records)); + std::sort(records.begin(), records.end()); + chassert(isUniqTypes()); + } + + template + void add(std::shared_ptr info) + { + static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); + return addImpl(std::type_index(typeid(T)), std::move(info)); + } + + template + std::shared_ptr get() const + { + static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); + auto it = getImpl(std::type_index(typeid(T))); + if (it == records.cend()) + return nullptr; + auto cast = std::dynamic_pointer_cast(it->ptr); + chassert(cast); + return cast; + } + + template + std::shared_ptr extract() + { + static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); + auto it = getImpl(std::type_index(typeid(T))); + if (it == records.cend()) + return nullptr; + auto cast = std::dynamic_pointer_cast(it->ptr); + chassert(cast); + + records.erase(it); + return cast; + } + +private: + bool isUniqTypes() const + { + auto uniq_it = std::adjacent_find(records.begin(), records.end()); + + return uniq_it == records.end(); + } + + void addImpl(std::type_index type_idx, ItemPtr item) + { + auto it = std::lower_bound(records.begin(), records.end(), type_idx); + + if (it == records.end()) + { + records.emplace_back(type_idx, item); + return; + } + + chassert(it->type_idx != type_idx); + + records.emplace(it, type_idx, item); + + chassert(isUniqTypes()); + } + + Records::const_iterator getImpl(std::type_index type_idx) const + { + auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx); + + if (it == records.cend()) + return records.cend(); + + if (it->type_idx != type_idx) + return records.cend(); + + return it; + } + + Records records; +}; + +} diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index ab29c64184d..65035790729 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -301,7 +301,7 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const auto & insert_query = query->as(); insert_query.async_insert_flush = true; - InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns); + InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns, false, false, false); auto table = interpreter.getTable(insert_query); auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); @@ -780,7 +780,7 @@ try try { interpreter = std::make_unique( - key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true); + key.query, insert_context, key.settings.insert_allow_materialized_columns, true, false, false); pipeline = interpreter->execute().pipeline; chassert(pipeline.pushing()); @@ -999,7 +999,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( } Chunk chunk(executor.getResultColumns(), total_rows); - chunk.setChunkInfo(std::move(chunk_info)); + chunk.getChunkInfos().add(std::move(chunk_info)); return chunk; } @@ -1051,7 +1051,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries( } Chunk chunk(std::move(result_columns), total_rows); - chunk.setChunkInfo(std::move(chunk_info)); + chunk.getChunkInfos().add(std::move(chunk_info)); return chunk; } diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index 4a84a7bf570..e070d8694a7 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -2,6 +2,7 @@ #include #include +#include #include @@ -11,6 +12,7 @@ #include #include #include +#include "Processors/Chunk.h" #include #include @@ -91,7 +93,7 @@ Chunk getChunkFromCheckResult(const String & database, const String & table, con return Chunk(std::move(columns), 1); } -class TableCheckTask : public ChunkInfo +class TableCheckTask : public ChunkInfoCloneable { public: TableCheckTask(StorageID table_id, const std::variant & partition_or_part, ContextPtr context) @@ -110,6 +112,12 @@ public: context->checkAccess(AccessType::SHOW_TABLES, table_->getStorageID()); } + TableCheckTask(const TableCheckTask & other) + : table(other.table) + , check_data_tasks(other.check_data_tasks) + , is_finished(other.is_finished.load()) + {} + std::optional checkNext() const { if (isFinished()) @@ -121,8 +129,8 @@ public: std::this_thread::sleep_for(sleep_time); }); - IStorage::DataValidationTasksPtr check_data_tasks_ = check_data_tasks; - auto result = table->checkDataNext(check_data_tasks_); + IStorage::DataValidationTasksPtr tmp = check_data_tasks; + auto result = table->checkDataNext(tmp); is_finished = !result.has_value(); return result; } @@ -180,7 +188,7 @@ protected: /// source should return at least one row to start pipeline result.addColumn(ColumnUInt8::create(1, 1)); /// actual data stored in chunk info - result.setChunkInfo(std::move(current_check_task)); + result.getChunkInfos().add(std::move(current_check_task)); return result; } @@ -280,7 +288,7 @@ public: protected: void transform(Chunk & chunk) override { - auto table_check_task = std::dynamic_pointer_cast(chunk.getChunkInfo()); + auto table_check_task = chunk.getChunkInfos().get(); auto check_result = table_check_task->checkNext(); if (!check_result) { diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 519cbde588f..a143ca867e1 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1690,8 +1690,15 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) else insert->select = create.select->clone(); - return InterpreterInsertQuery(insert, getContext(), - getContext()->getSettingsRef().insert_allow_materialized_columns).execute(); + return InterpreterInsertQuery( + insert, + getContext(), + getContext()->getSettingsRef().insert_allow_materialized_columns, + false, + false, + false + ) + .execute(); } return {}; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 458be843b59..08d6ac7df9e 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -524,7 +524,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl() } else if (dynamic_cast(ast.getExplainedQuery().get())) { - InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext()); + InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext(), false, false, false, false); auto io = insert.execute(); printPipeline(io.pipeline.getProcessors(), buf); } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index a5396be9b76..40d5a84031d 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +38,7 @@ #include #include #include +#include "Interpreters/Context_fwd.h" namespace ProfileEvents @@ -394,28 +396,323 @@ Chain InterpreterInsertQuery::buildPreSinkChain( return out; } +std::pair, std::vector> InterpreterInsertQuery::buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) +{ + ThreadGroupPtr running_group; + if (current_thread) + running_group = current_thread->getThreadGroup(); + if (!running_group) + running_group = std::make_shared(getContext()); + + std::vector sink_chains; + std::vector presink_chains; + + for (size_t i = 0; i < sink_streams; ++i) + { + LOG_DEBUG(getLogger("InsertQuery"), + "call buildSink table name {}.{}, stream {}/{}", + table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); + + auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, + running_group, /* elapsed_counter_ms= */ nullptr); + + sink_chains.emplace_back(std::move(out)); + } + + for (size_t i = 0; i < presink_streams; ++i) + { + auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); + presink_chains.emplace_back(std::move(out)); + } + + return {std::move(presink_chains), std::move(sink_chains)}; +} + + +QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline() +{ + const Settings & settings = getContext()->getSettingsRef(); + auto & query = query_ptr->as(); + + StoragePtr table = getTable(query); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); + + bool is_trivial_insert_select = false; + + if (settings.optimize_trivial_insert_select) + { + const auto & select_query = query.select->as(); + const auto & selects = select_query.list_of_selects->children; + const auto & union_modes = select_query.list_of_modes; + + /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries + const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; + + is_trivial_insert_select = + std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) + && std::all_of(selects.begin(), selects.end(), isTrivialSelect); + } + + ContextPtr select_context = getContext(); + + if (is_trivial_insert_select) + { + /** When doing trivial INSERT INTO ... SELECT ... FROM table, + * don't need to process SELECT with more than max_insert_threads + * and it's reasonable to set block size for SELECT to the desired block size for INSERT + * to avoid unnecessary squashing. + */ + + LOG_DEBUG(getLogger("InsertQuery"), + "execute() is_trivial_insert_select=true prefersLargeBlocks={}", table->prefersLargeBlocks()); + + Settings new_settings = select_context->getSettings(); + + new_settings.max_threads = std::max(1, settings.max_insert_threads); + + if (table->prefersLargeBlocks()) + { + new_settings.max_block_size = std::max(settings.min_insert_block_size_rows, settings.max_block_size); + new_settings.preferred_block_size_bytes = std::max(settings.min_insert_block_size_bytes, settings.preferred_block_size_bytes); + } + + auto context_for_trivial_select = Context::createCopy(context); + context_for_trivial_select->setSettings(new_settings); + context_for_trivial_select->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); + + select_context = context_for_trivial_select; + } + + QueryPipelineBuilder pipeline; + + { + auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); + + if (settings.allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, select_context, select_query_options); + pipeline = interpreter_select_analyzer.buildQueryPipeline(); + } + else + { + InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options); + pipeline = interpreter_select.buildQueryPipeline(); + } + } + + pipeline.dropTotalsAndExtremes(); + + /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. + if (getContext()->getSettingsRef().insert_null_as_default) + { + const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); + const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); + const auto & output_columns = metadata_snapshot->getColumns(); + + if (input_columns.size() == query_columns.size()) + { + for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) + { + /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with + /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. + if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) + && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) + && !isVariant(query_columns[col_idx].type) + && output_columns.has(query_columns[col_idx].name)) + { + query_sample_block.setColumn( + col_idx, + ColumnWithTypeAndName( + makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), + makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), + query_columns[col_idx].name)); + } + } + } + } + + auto actions_dag = ActionsDAG::makeConvertingActions( + pipeline.getHeader().getColumnsWithTypeAndName(), + query_sample_block.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header, actions); + }); + + /// We need to convert Sparse columns to full, because it's destination storage + /// may not support it or may have different settings for applying Sparse serialization. + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + auto context_ptr = getContext(); + auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + + return counting; + }); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + }); + } + + /// Number of streams works like this: + /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever + /// InterpreterSelectQuery ends up with. + /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. + /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. + /// * If the table supports parallel inserts, use the same streams for writing to IStorage. + /// Otherwise ResizeProcessor them down to 1 stream. + + size_t presink_streams_size = std::max(1, std::max(settings.max_insert_threads, pipeline.getNumStreams())); + size_t sink_streams_size = table->supportsParallelInsert() ? presink_streams_size : 1; + + auto [presink_chains, sink_chains] = buildPreAndSyncChains( + presink_streams_size, sink_streams_size, + table, metadata_snapshot, query_sample_block); + + if (!settings.insert_deduplication_token.value.empty()) + { + pipeline.resize(1); + + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr { + return std::make_shared(settings.insert_deduplication_token.value, in_header); + }); + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr { + return std::make_shared(in_header); + }); + } + + pipeline.resize(presink_chains.size()); + for (auto & chain : presink_chains) + pipeline.addResources(chain.detachResources()); + pipeline.addChains(std::move(presink_chains)); + + pipeline.resize(sink_streams_size); + for (auto & chain : sink_chains) + pipeline.addResources(chain.detachResources()); + pipeline.addChains(std::move(sink_chains)); + + if (!settings.parallel_view_processing) + { + size_t num_select_threads = pipeline.getNumThreads(); + /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. + if (pipeline.getNumThreads() > num_select_threads) + pipeline.setMaxThreads(num_select_threads); + } + else if (pipeline.getNumThreads() < settings.max_threads) + { + /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, + /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. + /// + /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. + pipeline.setMaxThreads(settings.max_threads); + } + + pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr + { + return std::make_shared(cur_header); + }); + + return QueryPipelineBuilder::getPipeline(std::move(pipeline)); +} + + +QueryPipeline InterpreterInsertQuery::buildInsertPipeline() +{ + const Settings & settings = getContext()->getSettingsRef(); + auto & query = query_ptr->as(); + + StoragePtr table = getTable(query); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); + + Chain chain; + + { + auto [presink_chains, sink_chains] = buildPreAndSyncChains( + 1, 1, + table, metadata_snapshot, query_sample_block); + + chain = std::move(presink_chains.front()); + chain.appendChain(std::move(sink_chains.front())); + } + + if (!settings.insert_deduplication_token.value.empty()) + { + chain.addSource(std::make_shared(chain.getInputHeader())); + chain.addSource(std::make_shared(settings.insert_deduplication_token.value, chain.getInputHeader())); + } + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + auto squashing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(squashing)); + } + + auto context_ptr = getContext(); + auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + chain.addSource(std::move(counting)); + + QueryPipeline pipeline = QueryPipeline(std::move(chain)); + pipeline.setNumThreads(std::min(pipeline.getNumThreads(), settings.max_threads)); + pipeline.setConcurrencyControl(settings.use_concurrency_control); + + if (query.hasInlinedData() && !async_insert) + { + /// can execute without additional data + auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); + for (auto && buffer : owned_buffers) + format->addBuffer(std::move(buffer)); + + auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); + pipeline.complete(std::move(pipe)); + } + + return pipeline; +} + + BlockIO InterpreterInsertQuery::execute() { const Settings & settings = getContext()->getSettingsRef(); auto & query = query_ptr->as(); - QueryPipelineBuilder pipeline; - std::optional distributed_pipeline; - QueryPlanResourceHolder resources; StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); - StoragePtr inner_table; - if (const auto * mv = dynamic_cast(table.get())) - inner_table = mv->getTargetTable(); - if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout); - auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); /// For table functions we check access while executing @@ -423,303 +720,37 @@ BlockIO InterpreterInsertQuery::execute() if (!query.table_function) getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); - if (query.select && settings.parallel_distributed_insert_select) - // Distributed INSERT SELECT - distributed_pipeline = table->distributedWrite(query, getContext()); - - std::vector presink_chains; - std::vector sink_chains; - if (!distributed_pipeline) + if (!allow_materialized) { - /// Number of streams works like this: - /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever - /// InterpreterSelectQuery ends up with. - /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. - /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. - /// * If the table supports parallel inserts, use the same streams for writing to IStorage. - /// Otherwise ResizeProcessor them down to 1 stream. - /// * If it's not an INSERT SELECT, forget all that and use one stream. - size_t pre_streams_size = 1; - size_t sink_streams_size = 1; - - if (query.select) - { - bool is_trivial_insert_select = false; - - if (settings.optimize_trivial_insert_select) - { - const auto & select_query = query.select->as(); - const auto & selects = select_query.list_of_selects->children; - const auto & union_modes = select_query.list_of_modes; - - /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries - const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; - - is_trivial_insert_select = - std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) - && std::all_of(selects.begin(), selects.end(), isTrivialSelect); - } - - if (is_trivial_insert_select) - { - /** When doing trivial INSERT INTO ... SELECT ... FROM table, - * don't need to process SELECT with more than max_insert_threads - * and it's reasonable to set block size for SELECT to the desired block size for INSERT - * to avoid unnecessary squashing. - */ - - LOG_DEBUG(getLogger("InsertQuery"), - "execute() is_trivial_insert_select=true prefersLargeBlocks={}", table->prefersLargeBlocks()); - - Settings new_settings = getContext()->getSettings(); - - new_settings.max_threads = std::max(1, settings.max_insert_threads); - - if (table->prefersLargeBlocks()) - { - new_settings.max_block_size = std::max(settings.min_insert_block_size_rows, settings.max_block_size); - new_settings.preferred_block_size_bytes = std::max(settings.min_insert_block_size_bytes, settings.preferred_block_size_bytes); - } - - auto new_context = Context::createCopy(context); - new_context->setSettings(new_settings); - new_context->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); - - auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); - - if (settings.allow_experimental_analyzer) - { - InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, new_context, select_query_options); - pipeline = interpreter_select_analyzer.buildQueryPipeline(); - } - else - { - InterpreterSelectWithUnionQuery interpreter_select(query.select, new_context, select_query_options); - pipeline = interpreter_select.buildQueryPipeline(); - } - } - else - { - /// Passing 1 as subquery_depth will disable limiting size of intermediate result. - auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); - - if (settings.allow_experimental_analyzer) - { - InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, getContext(), select_query_options); - pipeline = interpreter_select_analyzer.buildQueryPipeline(); - } - else - { - InterpreterSelectWithUnionQuery interpreter_select(query.select, getContext(), select_query_options); - pipeline = interpreter_select.buildQueryPipeline(); - } - } - - pipeline.dropTotalsAndExtremes(); - - if (settings.max_insert_threads > 1) - { - pre_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); - - -// /// Deduplication when passing insert_deduplication_token breaks if using more than one thread -// if (!settings.insert_deduplication_token.toString().empty()) -// { -// /// TODO! -// LOG_DEBUG( -// getLogger("InsertQuery"), -// "Insert-select query using insert_deduplication_token, setting streams from {} to 1 to avoid deduplication issues, pipeline.getNumStreams() {}", -// pre_streams_size, pipeline.getNumStreams()); -// pre_streams_size = 1; -// } - - if (table->supportsParallelInsert()) - sink_streams_size = pre_streams_size; - } - - LOG_DEBUG( - getLogger("InsertQuery"), - "pre_streams_size {}, pipeline.getNumStreams() {}", - pre_streams_size, pipeline.getNumStreams()); - - pipeline.resize(pre_streams_size); - - /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. - if (getContext()->getSettingsRef().insert_null_as_default) - { - const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); - const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); - const auto & output_columns = metadata_snapshot->getColumns(); - - if (input_columns.size() == query_columns.size()) - { - for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) - { - /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with - /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. - if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) && !isVariant(query_columns[col_idx].type) && output_columns.has(query_columns[col_idx].name)) - query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name)); - } - } - } - } - - ThreadGroupPtr running_group; - if (current_thread) - running_group = current_thread->getThreadGroup(); - if (!running_group) - running_group = std::make_shared(getContext()); - for (size_t i = 0; i < sink_streams_size; ++i) - { - LOG_DEBUG(getLogger("InsertQuery"), - "call buildSink table name {}.{}, stream {}/{}", - table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams_size); - - auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, - running_group, /* elapsed_counter_ms= */ nullptr); - - sink_chains.emplace_back(std::move(out)); - } - for (size_t i = 0; i < pre_streams_size; ++i) - { - auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); - presink_chains.emplace_back(std::move(out)); - } + for (const auto & column : metadata_snapshot->getColumns()) + if (column.default_desc.kind == ColumnDefaultKind::Materialized && query_sample_block.has(column.name)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); } BlockIO res; - /// What type of query: INSERT or INSERT SELECT or INSERT WATCH? - if (distributed_pipeline) + if (query.select) { - res.pipeline = std::move(*distributed_pipeline); - } - else if (query.select) - { - const auto & header = presink_chains.at(0).getInputHeader(); - auto actions_dag = ActionsDAG::makeConvertingActions( - pipeline.getHeader().getColumnsWithTypeAndName(), - header.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + if (settings.parallel_distributed_insert_select) { - return std::make_shared(in_header, actions); - }); - - /// We need to convert Sparse columns to full, because it's destination storage - /// may not support it or may have different settings for applying Sparse serialization. - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - auto context_ptr = getContext(); - auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - - return counting; - }); - - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - }); + res.pipeline = *table->distributedWrite(query, getContext()); } - - size_t num_select_threads = pipeline.getNumThreads(); - - for (auto & chain : presink_chains) - resources = chain.detachResources(); - for (auto & chain : sink_chains) - resources = chain.detachResources(); - - pipeline.addChains(std::move(presink_chains)); - pipeline.resize(sink_chains.size()); - pipeline.addChains(std::move(sink_chains)); - - if (!settings.parallel_view_processing) + else { - /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. - if (pipeline.getNumThreads() > num_select_threads) - pipeline.setMaxThreads(num_select_threads); + res.pipeline = buildInsertSelectPipeline(); } - else if (pipeline.getNumThreads() < settings.max_threads) - { - /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, - /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. - /// - /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. - pipeline.setMaxThreads(settings.max_threads); - } - - pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr - { - return std::make_shared(cur_header); - }); - - if (!allow_materialized) - { - for (const auto & column : metadata_snapshot->getColumns()) - if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); - } - - res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); } else { - auto & chain = presink_chains.at(0); - chain.appendChain(std::move(sink_chains.at(0))); - - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - auto squashing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(squashing)); - } - - auto context_ptr = getContext(); - auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - chain.addSource(std::move(counting)); - - res.pipeline = QueryPipeline(std::move(presink_chains[0])); - res.pipeline.setNumThreads(std::min(res.pipeline.getNumThreads(), settings.max_threads)); - res.pipeline.setConcurrencyControl(settings.use_concurrency_control); - - if (query.hasInlinedData() && !async_insert) - { - /// can execute without additional data - auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); - for (auto && buffer : owned_buffers) - format->addBuffer(std::move(buffer)); - - auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); - res.pipeline.complete(std::move(pipe)); - } + res.pipeline = buildInsertPipeline(); } - res.pipeline.addResources(std::move(resources)); - res.pipeline.addStorageHolder(table); + + StoragePtr inner_table; + if (const auto * mv = dynamic_cast(table.get())) + inner_table = mv->getTargetTable(); + if (inner_table) res.pipeline.addStorageHolder(inner_table); @@ -742,17 +773,21 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, Cont } } + void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const { extendQueryLogElemImpl(elem, context_); } + void registerInterpreterInsertQuery(InterpreterFactory & factory) { auto create_fn = [] (const InterpreterFactory::Arguments & args) { - return std::make_unique(args.query, args.context, args.allow_materialized); + return std::make_unique(args.query, args.context, args.allow_materialized, false, false, false); }; factory.registerInterpreter("InterpreterInsertQuery", create_fn); } + + } diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index bf73fb2a319..3f3b7a6f106 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -23,10 +23,10 @@ public: InterpreterInsertQuery( const ASTPtr & query_ptr_, ContextPtr context_, - bool allow_materialized_ = false, - bool no_squash_ = false, - bool no_destination_ = false, - bool async_insert_ = false); + bool allow_materialized_, + bool no_squash_, + bool no_destination, + bool async_insert_); /** Prepare a request for execution. Return block streams * - the stream into which you can write data to execute the query, if INSERT; @@ -73,12 +73,17 @@ private: ASTPtr query_ptr; const bool allow_materialized; - const bool no_squash; - const bool no_destination; + bool no_squash = false; + bool no_destination = false; const bool async_insert; std::vector> owned_buffers; + std::pair, std::vector> buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); + + QueryPipeline buildInsertSelectPipeline(); + QueryPipeline buildInsertPipeline(); + Chain buildSink( const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 3af8761ff8e..2d5109a612c 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -522,7 +522,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, /// We always want to deliver the data to the original table regardless of the MVs insert_context->setSetting("materialized_views_ignore_errors", true); - InterpreterInsertQuery interpreter(query_ptr, insert_context); + InterpreterInsertQuery interpreter(query_ptr, insert_context, false, false, false, false); BlockIO io = interpreter.execute(); PushingPipelineExecutor executor(io.pipeline); diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 2631f665f9c..13df2e64421 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -19,14 +19,6 @@ Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns checkNumRowsIsConsistent(); } -Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) - : columns(std::move(columns_)) - , num_rows(num_rows_) - , chunk_info(std::move(chunk_info_)) -{ - checkNumRowsIsConsistent(); -} - static Columns unmuteColumns(MutableColumns && mutable_columns) { Columns columns; @@ -43,17 +35,11 @@ Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_) checkNumRowsIsConsistent(); } -Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) - : columns(unmuteColumns(std::move(columns_))) - , num_rows(num_rows_) - , chunk_info(std::move(chunk_info_)) -{ - checkNumRowsIsConsistent(); -} - Chunk Chunk::clone() const { - return Chunk(getColumns(), getNumRows(), chunk_info); + auto tmp = Chunk(getColumns(), getNumRows()); + tmp.setChunkInfos(chunk_infos.clone()); + return tmp; } void Chunk::setColumns(Columns columns_, UInt64 num_rows_) diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 4f753798eaa..b4345d18a08 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -1,7 +1,15 @@ #pragma once +#include "base/defines.h" + +#include #include + +#include +#include +#include #include +#include namespace DB { @@ -9,11 +17,29 @@ namespace DB class ChunkInfo { public: - virtual ~ChunkInfo() = default; + using Ptr = std::shared_ptr; + ChunkInfo() = default; + ChunkInfo(const ChunkInfo&) = default; + ChunkInfo(ChunkInfo&&) = default; + + virtual Ptr clone() const = 0; + virtual ~ChunkInfo() = default; }; -using ChunkInfoPtr = std::shared_ptr; + +template +class ChunkInfoCloneable : public ChunkInfo +{ +public: + ChunkInfoCloneable() = default; + ChunkInfoCloneable(const ChunkInfoCloneable & other) = default; + + Ptr clone() const override + { + return std::static_pointer_cast(std::make_shared(*static_cast(this))); + } +}; /** * Chunk is a list of columns with the same length. @@ -32,26 +58,26 @@ using ChunkInfoPtr = std::shared_ptr; class Chunk { public: + using ChunkInfoCollection = CollectionOfDerivedItems; + Chunk() = default; Chunk(const Chunk & other) = delete; Chunk(Chunk && other) noexcept : columns(std::move(other.columns)) , num_rows(other.num_rows) - , chunk_info(std::move(other.chunk_info)) + , chunk_infos(std::move(other.chunk_infos)) { other.num_rows = 0; } Chunk(Columns columns_, UInt64 num_rows_); - Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk(MutableColumns columns_, UInt64 num_rows_); - Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk & operator=(const Chunk & other) = delete; Chunk & operator=(Chunk && other) noexcept { columns = std::move(other.columns); - chunk_info = std::move(other.chunk_info); + chunk_infos = std::move(other.chunk_infos); num_rows = other.num_rows; other.num_rows = 0; return *this; @@ -62,15 +88,15 @@ public: void swap(Chunk & other) noexcept { columns.swap(other.columns); - chunk_info.swap(other.chunk_info); std::swap(num_rows, other.num_rows); + chunk_infos.swap(other.chunk_infos); } void clear() { num_rows = 0; columns.clear(); - chunk_info.reset(); + chunk_infos.clear(); } const Columns & getColumns() const { return columns; } @@ -81,9 +107,9 @@ public: /** Get empty columns with the same types as in block. */ MutableColumns cloneEmptyColumns() const; - const ChunkInfoPtr & getChunkInfo() const { return chunk_info; } - bool hasChunkInfo() const { return chunk_info != nullptr; } - void setChunkInfo(ChunkInfoPtr chunk_info_) { chunk_info = std::move(chunk_info_); } + ChunkInfoCollection & getChunkInfos() { return chunk_infos; } + const ChunkInfoCollection & getChunkInfos() const { return chunk_infos; } + void setChunkInfos(ChunkInfoCollection chunk_infos_) { chunk_infos = std::move(chunk_infos_); } UInt64 getNumRows() const { return num_rows; } UInt64 getNumColumns() const { return columns.size(); } @@ -107,7 +133,7 @@ public: private: Columns columns; UInt64 num_rows = 0; - ChunkInfoPtr chunk_info; + ChunkInfoCollection chunk_infos; void checkNumRowsIsConsistent(); }; @@ -117,11 +143,15 @@ using Chunks = std::vector; /// AsyncInsert needs two kinds of information: /// - offsets of different sub-chunks /// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`. -class AsyncInsertInfo : public ChunkInfo +class AsyncInsertInfo : public ChunkInfoCloneable { public: AsyncInsertInfo() = default; - explicit AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) : offsets(offsets_), tokens(tokens_) {} + AsyncInsertInfo(const AsyncInsertInfo & other) = default; + AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) + : offsets(offsets_) + , tokens(tokens_) + {} std::vector offsets; std::vector tokens; @@ -130,9 +160,11 @@ public: using AsyncInsertInfoPtr = std::shared_ptr; /// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults. -class ChunkMissingValues : public ChunkInfo +class ChunkMissingValues : public ChunkInfoCloneable { public: + ChunkMissingValues(const ChunkMissingValues & other) = default; + using RowsBitMask = std::vector; /// a bit per row for a column const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index d27002197d2..d9fab88fe1f 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -147,13 +147,10 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto chunk_info = chunk.getChunkInfo()) + if (auto agg_info = chunk.getChunkInfos().get()) { - if (const auto * agg_info = typeid_cast(chunk_info.get())) - { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; - } + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; } return true; diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index cbf73c5cb07..25c15d40c9a 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -73,13 +73,10 @@ bool PullingPipelineExecutor::pull(Block & block) } block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto chunk_info = chunk.getChunkInfo()) + if (auto agg_info = chunk.getChunkInfos().get()) { - if (const auto * agg_info = typeid_cast(chunk_info.get())) - { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; - } + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; } return true; diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 9c85dab70c4..6067e2f3db3 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -180,7 +180,9 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count); Chunks piece; - piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo()); + piece.emplace_back(std::move(columns), count); + piece.back().setChunkInfos(concatenated.getChunkInfos()); + writeRowGroup(std::move(piece)); } } diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp index 4136fc5a5f2..46be6e74693 100644 --- a/src/Processors/IAccumulatingTransform.cpp +++ b/src/Processors/IAccumulatingTransform.cpp @@ -8,8 +8,9 @@ namespace ErrorCodes } IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header) - : IProcessor({std::move(input_header)}, {std::move(output_header)}), - input(inputs.front()), output(outputs.front()) + : IProcessor({std::move(input_header)}, {std::move(output_header)}) + , input(inputs.front()) + , output(outputs.front()) { } diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp index a5befca7233..f33cc267c44 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp @@ -51,16 +51,11 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num if (!input.chunk.hasRows()) return; - const auto & info = input.chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm"); + const auto & arenas_info = input.chunk.getChunkInfos().get(); + if (!arenas_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ChunkInfoWithAllocatedBytes was not set for chunk in FinishAggregatingInOrderAlgorithm"); - Int64 allocated_bytes = 0; - /// Will be set by AggregatingInOrderTransform during local aggregation; will be nullptr during merging on initiator. - if (const auto * arenas_info = typeid_cast(info.get())) - allocated_bytes = arenas_info->allocated_bytes; - - states[source_num] = State{input.chunk, description, allocated_bytes}; + states[source_num] = State{input.chunk, description, arenas_info->allocated_bytes}; } IMergingAlgorithm::Status FinishAggregatingInOrderAlgorithm::merge() @@ -134,7 +129,7 @@ Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge() info->chunk_num = chunk_num++; Chunk chunk; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); return chunk; } @@ -161,7 +156,7 @@ void FinishAggregatingInOrderAlgorithm::addToAggregation() chunks.emplace_back(std::move(new_columns), current_rows); } - chunks.back().setChunkInfo(std::make_shared()); + chunks.back().getChunkInfos().add(std::make_shared()); states[i].current_row = states[i].to_row; /// We assume that sizes in bytes of rows are almost the same. diff --git a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h index bcf4e759024..e4f22deec8d 100644 --- a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h +++ b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h @@ -6,18 +6,22 @@ namespace DB { /// To carry part level if chunk is produced by a merge tree source -class MergeTreePartLevelInfo : public ChunkInfo +class MergeTreePartLevelInfo : public ChunkInfoCloneable { public: MergeTreePartLevelInfo() = delete; - explicit MergeTreePartLevelInfo(ssize_t part_level) : origin_merge_tree_part_level(part_level) { } + explicit MergeTreePartLevelInfo(ssize_t part_level) + : origin_merge_tree_part_level(part_level) + { } + MergeTreePartLevelInfo(const MergeTreePartLevelInfo & other) = default; + size_t origin_merge_tree_part_level = 0; }; inline size_t getPartLevelFromChunk(const Chunk & chunk) { - const auto & info = chunk.getChunkInfo(); - if (const auto * part_level_info = typeid_cast(info.get())) + const auto part_level_info = chunk.getChunkInfos().get(); + if (part_level_info) return part_level_info->origin_merge_tree_part_level; return 0; } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index 9e5c1249c4e..d0b0291511d 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes static IMergingAlgorithm::Status emitChunk(detail::SharedChunkPtr & chunk, bool finished = false) { - chunk->setChunkInfo(std::make_shared(std::move(chunk->replace_final_selection))); + chunk->getChunkInfos().add(std::make_shared(std::move(chunk->replace_final_selection))); return IMergingAlgorithm::Status(std::move(*chunk), finished); } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index 2fbd73c9072..770510232cc 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -1,8 +1,10 @@ #pragma once +#include #include #include #include #include +#include "Processors/Chunk.h" namespace Poco { @@ -14,11 +16,13 @@ namespace DB /** Use in skipping final to keep list of indices of selected row after merging final */ -struct ChunkSelectFinalIndices : public ChunkInfo +struct ChunkSelectFinalIndices : public ChunkInfoCloneable { + explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); + ChunkSelectFinalIndices(const ChunkSelectFinalIndices & other) = default; + const ColumnPtr column_holder; const ColumnUInt64 * select_final_indices = nullptr; - explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); }; /** Merges several sorted inputs into one. diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index fbb47969b2f..b1b0182a113 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -157,7 +157,7 @@ IProcessor::Status IMergingTransformBase::prepare() bool is_port_full = !output.canPush(); /// Push if has data. - if ((state.output_chunk || state.output_chunk.hasChunkInfo()) && !is_port_full) + if ((state.output_chunk || !state.output_chunk.getChunkInfos().empty()) && !is_port_full) output.push(std::move(state.output_chunk)); if (!is_initialized) diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index c218f622870..be629271736 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -129,7 +129,7 @@ public: IMergingAlgorithm::Status status = algorithm.merge(); - if ((status.chunk && status.chunk.hasRows()) || status.chunk.hasChunkInfo()) + if ((status.chunk && status.chunk.hasRows()) || !status.chunk.getChunkInfos().empty()) { // std::cerr << "Got chunk with " << status.chunk.getNumRows() << " rows" << std::endl; state.output_chunk = std::move(status.chunk); diff --git a/src/Processors/Sinks/RemoteSink.h b/src/Processors/Sinks/RemoteSink.h index 30cf958c072..c05cc1defcb 100644 --- a/src/Processors/Sinks/RemoteSink.h +++ b/src/Processors/Sinks/RemoteSink.h @@ -20,7 +20,7 @@ public: } String getName() const override { return "RemoteSink"; } - void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); } + void consume (Chunk & chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.getColumns())); } void onFinish() override { RemoteInserter::onFinish(); } }; diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 5f9f9f9b1a1..146bd4505a4 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -15,8 +15,10 @@ void SinkToStorage::onConsume(Chunk chunk) */ Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); - consume(chunk.clone()); - if (!lastBlockIsDuplicate()) + setDeduplicationTokenForChildren(chunk); + fillDeduplicationTokenForChildren(chunk); + consume(chunk); + if (!lastBlockIsDuplicate()) // TODO: remove that cur_chunk = std::move(chunk); } diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index 023bbd8b094..07a944b0943 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -1,6 +1,10 @@ #pragma once +#include #include +#include #include +#include +#include "Processors/Transforms/NumberBlocksTransform.h" namespace DB { @@ -18,9 +22,38 @@ public: void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } protected: - virtual void consume(Chunk chunk) = 0; + virtual void consume(Chunk & chunk) = 0; virtual bool lastBlockIsDuplicate() const { return false; } + virtual std::shared_ptr setDeduplicationTokenForChildren(Chunk & chunk) const + { + auto token_info = chunk.getChunkInfos().get(); + if (token_info) + return token_info; + + auto block_dedup_token_for_children = std::make_shared(""); + chunk.getChunkInfos().add(block_dedup_token_for_children); + return block_dedup_token_for_children; + } + + virtual std::shared_ptr getDeduplicationTokenForChildren(Chunk & chunk) const + { + return chunk.getChunkInfos().get(); + } + + virtual void fillDeduplicationTokenForChildren(Chunk & chunk) const + { + SipHash hash; + for (const auto & colunm: chunk.getColumns()) + { + colunm->updateHashFast(hash); + } + const auto hash_value = hash.get128(); + + chunk.getChunkInfos().get()->addTokenPart( + fmt::format(":hash-{}", toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]))); + } + private: std::vector table_locks; @@ -38,7 +71,7 @@ class NullSinkToStorage : public SinkToStorage public: using SinkToStorage::SinkToStorage; std::string getName() const override { return "NullSinkToStorage"; } - void consume(Chunk) override {} + void consume(Chunk &) override {} }; using SinkPtr = std::shared_ptr; diff --git a/src/Processors/Sources/BlocksSource.h b/src/Processors/Sources/BlocksSource.h index ec0dc9609f1..7ac460c14e2 100644 --- a/src/Processors/Sources/BlocksSource.h +++ b/src/Processors/Sources/BlocksSource.h @@ -43,7 +43,10 @@ protected: info->bucket_num = res.info.bucket_num; info->is_overflows = res.info.is_overflows; - return Chunk(res.getColumns(), res.rows(), std::move(info)); + auto chunk = Chunk(res.getColumns(), res.rows()); + chunk.getChunkInfos().add(std::move(info)); + + return chunk; } private: diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 3d7dd3f76b8..1578bd389c9 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -176,7 +176,7 @@ std::optional RemoteSource::tryGenerate() auto info = std::make_shared(); info->bucket_num = block.info.bucket_num; info->is_overflows = block.info.is_overflows; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); } return chunk; diff --git a/src/Processors/Sources/SourceFromSingleChunk.cpp b/src/Processors/Sources/SourceFromSingleChunk.cpp index 00f40a34361..fb888c104c4 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.cpp +++ b/src/Processors/Sources/SourceFromSingleChunk.cpp @@ -20,7 +20,7 @@ SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmp auto info = std::make_shared(); info->bucket_num = data.info.bucket_num; info->is_overflows = data.info.is_overflows; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 9ffe15d0f85..45b0960ec8f 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -332,7 +332,7 @@ void AggregatingInOrderTransform::generate() variants.aggregates_pool = variants.aggregates_pools.at(0).get(); /// Pass info about used memory by aggregate functions further. - to_push_chunk.setChunkInfo(std::make_shared(cur_block_bytes)); + to_push_chunk.getChunkInfos().add(std::make_shared(cur_block_bytes)); cur_block_bytes = 0; cur_block_size = 0; @@ -351,11 +351,12 @@ FinalizeAggregatedTransform::FinalizeAggregatedTransform(Block header, Aggregati void FinalizeAggregatedTransform::transform(Chunk & chunk) { if (params->final) - finalizeChunk(chunk, aggregates_mask); - else if (!chunk.getChunkInfo()) { - auto info = std::make_shared(); - chunk.setChunkInfo(std::move(info)); + finalizeChunk(chunk, aggregates_mask); + } + else if (!chunk.getChunkInfos().get()) + { + chunk.getChunkInfos().add(std::make_shared()); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 5d50e97f552..6433f862dfd 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -5,6 +5,7 @@ #include #include #include +#include "Processors/Chunk.h" namespace DB { @@ -12,10 +13,12 @@ namespace DB struct InputOrderInfo; using InputOrderInfoPtr = std::shared_ptr; -struct ChunkInfoWithAllocatedBytes : public ChunkInfo +struct ChunkInfoWithAllocatedBytes : public ChunkInfoCloneable { + ChunkInfoWithAllocatedBytes(const ChunkInfoWithAllocatedBytes & other) = default; explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_) : allocated_bytes(allocated_bytes_) {} + Int64 allocated_bytes; }; diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index b48d435720a..d6595ef9e9a 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -35,7 +35,7 @@ Chunk convertToChunk(const Block & block) UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); return chunk; } @@ -44,15 +44,11 @@ namespace { const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk) { - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk."); - - const auto * agg_info = typeid_cast(info.get()); + auto agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo."); - return agg_info; + return agg_info.get(); } /// Reads chunks from file in native format. Provide chunks with aggregation info. @@ -210,11 +206,7 @@ private: void process(Chunk && chunk) { - if (!chunk.hasChunkInfo()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with chunk info in {}", getName()); - - const auto & info = chunk.getChunkInfo(); - const auto * chunks_to_merge = typeid_cast(info.get()); + auto chunks_to_merge = chunk.getChunkInfos().get(); if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with ChunksToMerge info in {}", getName()); diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index e167acde067..430a9a6e50a 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -8,6 +9,7 @@ #include #include #include +#include "Processors/Chunk.h" namespace CurrentMetrics { @@ -19,7 +21,7 @@ namespace CurrentMetrics namespace DB { -class AggregatedChunkInfo : public ChunkInfo +class AggregatedChunkInfo : public ChunkInfoCloneable { public: bool is_overflows = false; diff --git a/src/Processors/Transforms/FilterTransform.cpp b/src/Processors/Transforms/FilterTransform.cpp index 0793bb3db5b..36aea045b18 100644 --- a/src/Processors/Transforms/FilterTransform.cpp +++ b/src/Processors/Transforms/FilterTransform.cpp @@ -38,10 +38,9 @@ static void replaceFilterToConstant(Block & block, const String & filter_column_ static std::shared_ptr getSelectByFinalIndices(Chunk & chunk) { - if (auto select_final_indices_info = std::dynamic_pointer_cast(chunk.getChunkInfo())) + if (auto select_final_indices_info = chunk.getChunkInfos().extract()) { const auto & index_column = select_final_indices_info->select_final_indices; - chunk.setChunkInfo(nullptr); if (index_column && index_column->size() != chunk.getNumRows()) return select_final_indices_info; } diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 3e2a9462e54..ca204bcb482 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -365,10 +365,9 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare() return Status::Finished; } - if (!data.chunk.hasChunkInfo()) + task = data.chunk.getChunkInfos().get(); + if (!task) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info"); - - task = std::dynamic_pointer_cast(data.chunk.getChunkInfo()); } else { @@ -479,7 +478,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() if (output.isFinished()) continue; Chunk chunk; - chunk.setChunkInfo(std::make_shared()); + chunk.getChunkInfos().add(std::make_shared()); output.push(std::move(chunk)); output.finish(); } @@ -496,7 +495,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() { Chunk chunk; auto task = std::make_shared(delayed_blocks, left_delayed_stream_finished_counter); - chunk.setChunkInfo(task); + chunk.getChunkInfos().add(std::move(task)); output.push(std::move(chunk)); } delayed_blocks = nullptr; diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index a308af03662..5fdea2524e2 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -1,5 +1,7 @@ #pragma once +#include #include +#include "Processors/Chunk.h" namespace DB @@ -111,11 +113,12 @@ private: }; -class DelayedBlocksTask : public ChunkInfo +class DelayedBlocksTask : public ChunkInfoCloneable { public: DelayedBlocksTask() = default; + DelayedBlocksTask(const DelayedBlocksTask & other) = default; explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_) : delayed_blocks(std::move(delayed_blocks_)) , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_) diff --git a/src/Processors/Transforms/MemoryBoundMerging.h b/src/Processors/Transforms/MemoryBoundMerging.h index 607087fb39c..d7bc320173b 100644 --- a/src/Processors/Transforms/MemoryBoundMerging.h +++ b/src/Processors/Transforms/MemoryBoundMerging.h @@ -150,11 +150,7 @@ private: if (!chunk.hasRows()) return; - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform."); - - const auto * agg_info = typeid_cast(info.get()); + const auto & agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception( ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedForMemoryBoundMergingTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index fc40c6894bb..ea9ebb0f96e 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -30,10 +30,10 @@ void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_ auto info = std::make_shared(); info->bucket_num = bucket; info->is_overflows = is_overflows; - info->chunks = std::make_unique(std::move(chunks)); + info->chunks = std::make_shared(std::move(chunks)); Chunk chunk; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); output.push(std::move(chunk)); } @@ -255,11 +255,10 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) if (!chunk.hasRows()) return; - const auto & info = chunk.getChunkInfo(); - if (!info) + if (chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in GroupingAggregatedTransform."); - if (const auto * agg_info = typeid_cast(info.get())) + if (auto agg_info = chunk.getChunkInfos().get()) { Int32 bucket = agg_info->bucket_num; bool is_overflows = agg_info->is_overflows; @@ -275,7 +274,7 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) last_bucket_number[input] = bucket; } } - else if (typeid_cast(info.get())) + else if (chunk.getChunkInfos().get()) { single_level_chunks.emplace_back(std::move(chunk)); } @@ -304,7 +303,11 @@ void GroupingAggregatedTransform::work() Int32 bucket = cur_block.info.bucket_num; auto chunk_info = std::make_shared(); chunk_info->bucket_num = bucket; - chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info))); + + auto chunk = Chunk(cur_block.getColumns(), cur_block.rows()); + chunk.getChunkInfos().add(std::move(chunk_info)); + + chunks_map[bucket].emplace_back(std::move(chunk)); } } } @@ -319,9 +322,7 @@ MergingAggregatedBucketTransform::MergingAggregatedBucketTransform( void MergingAggregatedBucketTransform::transform(Chunk & chunk) { - const auto & info = chunk.getChunkInfo(); - const auto * chunks_to_merge = typeid_cast(info.get()); - + auto chunks_to_merge = chunk.getChunkInfos().get(); if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge."); @@ -330,11 +331,10 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) BlocksList blocks_list; for (auto & cur_chunk : *chunks_to_merge->chunks) { - const auto & cur_info = cur_chunk.getChunkInfo(); - if (!cur_info) + if (cur_chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedBucketTransform."); - if (const auto * agg_info = typeid_cast(cur_info.get())) + if (auto agg_info = cur_chunk.getChunkInfos().get()) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = agg_info->is_overflows; @@ -342,7 +342,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) blocks_list.emplace_back(std::move(block)); } - else if (typeid_cast(cur_info.get())) + else if (cur_chunk.getChunkInfos().get()) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = false; @@ -361,7 +361,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) res_info->is_overflows = chunks_to_merge->is_overflows; res_info->bucket_num = chunks_to_merge->bucket_num; res_info->chunk_num = chunks_to_merge->chunk_num; - chunk.setChunkInfo(std::move(res_info)); + chunk.getChunkInfos().add(std::move(res_info)); auto block = params->aggregator.mergeBlocks(blocks_list, params->final, is_cancelled); @@ -405,11 +405,7 @@ bool SortingAggregatedTransform::tryPushChunk() void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input) { - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedTransform."); - - const auto * agg_info = typeid_cast(info.get()); + auto agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h index 77ee3034ffc..958b43b11ed 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h @@ -2,6 +2,7 @@ #include #include +#include "Processors/Chunk.h" #include #include #include @@ -142,9 +143,9 @@ private: void addChunk(Chunk chunk, size_t from_input); }; -struct ChunksToMerge : public ChunkInfo +struct ChunksToMerge : public ChunkInfoCloneable { - std::unique_ptr chunks; + std::shared_ptr chunks; Int32 bucket_num = -1; bool is_overflows = false; UInt64 chunk_num = 0; // chunk number in order of generation, used during memory bound merging to restore chunks order diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index ad723da7527..446e60a0b81 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -32,11 +32,10 @@ void MergingAggregatedTransform::consume(Chunk chunk) total_input_rows += input_rows; ++total_input_blocks; - const auto & info = chunk.getChunkInfo(); - if (!info) + if (chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedTransform."); - if (const auto * agg_info = typeid_cast(info.get())) + if (auto agg_info = chunk.getChunkInfos().get()) { /** If the remote servers used a two-level aggregation method, * then blocks will contain information about the number of the bucket. @@ -49,7 +48,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) bucket_to_blocks[agg_info->bucket_num].emplace_back(std::move(block)); } - else if (typeid_cast(info.get())) + else if (chunk.getChunkInfos().get()) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); block.info.is_overflows = false; @@ -89,7 +88,8 @@ Chunk MergingAggregatedTransform::generate() UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - chunk.setChunkInfo(std::move(info)); + + chunk.getChunkInfos().add(std::move(info)); return chunk; } diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/NumberBlocksTransform.cpp new file mode 100644 index 00000000000..61ff3f6bfd5 --- /dev/null +++ b/src/Processors/Transforms/NumberBlocksTransform.cpp @@ -0,0 +1 @@ +#include diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h new file mode 100644 index 00000000000..ca990a925c1 --- /dev/null +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -0,0 +1,224 @@ +#pragma once + +#include +#include +#include +#include + +#include + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace DB +{ + struct SerialBlockNumberInfo : public ChunkInfoCloneable + { + SerialBlockNumberInfo(const SerialBlockNumberInfo & other) = default; + explicit SerialBlockNumberInfo(size_t block_number_) + : block_number(block_number_) + { + } + + size_t block_number = 0; + }; + + + class NumberBlocksTransform : public ISimpleTransform + { + public: + explicit NumberBlocksTransform(const Block & header) + : ISimpleTransform(header, header, true) + { + } + + String getName() const override { return "NumberBlocksTransform"; } + + void transform(Chunk & chunk) override + { + chunk.getChunkInfos().add(std::make_shared(block_number++)); + } + + private: + size_t block_number = 0; + }; + + + class DedupTokenInfo : public ChunkInfoCloneable + { + public: + DedupTokenInfo(const DedupTokenInfo & other) = default; + explicit DedupTokenInfo(String first_part) + { + addTokenPart(std::move(first_part)); + } + + String getToken() const + { + String result; + result.reserve(getTotalSize()); + + for (const auto & part : token_parts) + { + result.append(part); + } + + return result; + } + + void addTokenPart(String part) + { + token_parts.push_back(std::move(part)); + } + + private: + size_t getTotalSize() const + { + size_t size = 0; + for (const auto & part : token_parts) + size += part.size(); + return size; + } + + std::vector token_parts; + }; + + class AddUserDeduplicationTokenTransform : public ISimpleTransform + { + public: + AddUserDeduplicationTokenTransform(String token_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , token(token_) + { + } + + String getName() const override { return "AddUserDeduplicationTokenTransform"; } + + void transform(Chunk & chunk) override + { + chunk.getChunkInfos().add(std::make_shared(token)); + } + + private: + String token; + }; + + + class CheckInsertDeduplicationTokenTransform : public ISimpleTransform + { + public: + CheckInsertDeduplicationTokenTransform(String debug_, bool must_be_present_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , debug(debug_) + , must_be_present(must_be_present_) + { + } + + String getName() const override { return "CheckInsertDeduplicationTokenTransform"; } + + void transform(Chunk & chunk) override + { + if (!must_be_present) + return; + + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); + + LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), + "{}, token: {}", + debug, token_info->getToken()); + } + + private: + String debug; + bool must_be_present = false; + }; + + + class ExtendDeduplicationWithBlockNumberFromInfoTokenTransform : public ISimpleTransform + { + public: + explicit ExtendDeduplicationWithBlockNumberFromInfoTokenTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "ExtendDeduplicationWithBlockNumberFromInfoTokenTransform"; } + + void transform(Chunk & chunk) override + { + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, recs {}", chunk.getChunkInfos().size()); + + auto block_number_info = chunk.getChunkInfos().get(); + if (!block_number_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have SerialBlockNumberInfo as ChunkInfo"); + + token_info->addTokenPart(fmt::format(":block-{}", block_number_info->block_number)); + + LOG_DEBUG(getLogger("ExtendDeduplicationWithBlockNumberFromInfoTokenTransform"), + "updated with {}, result: {}", + fmt::format(":block-{}", block_number_info->block_number), token_info->getToken()); + } + }; + + class ExtendDeduplicationWithBlockNumberTokenTransform : public ISimpleTransform + { + public: + explicit ExtendDeduplicationWithBlockNumberTokenTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "ExtendDeduplicationWithBlockNumberTokenTransform"; } + + void transform(Chunk & chunk) override + { + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo"); + + auto x = block_number++; + token_info->addTokenPart(fmt::format(":block-{}", x)); + + LOG_DEBUG(getLogger("ExtendDeduplicationWithBlockNumberTokenTransform"), + "updated with {}, result: {}", + fmt::format(":block-{}", x), token_info->getToken()); + } + private: + size_t block_number = 0; + }; + + class ExtendDeduplicationWithTokenPartTransform : public ISimpleTransform + { + public: + ExtendDeduplicationWithTokenPartTransform(String token_part_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , token_part(token_part_) + { + } + + String getName() const override { return "ExtendDeduplicationWithBlockNumberTokenTransform"; } + + void transform(Chunk & chunk) override + { + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, try to add token part {}", token_part); + + token_info->addTokenPart(fmt::format("{}", token_part)); + + LOG_DEBUG(getLogger("ExtendDeduplicationWithTokenPartTransform"), + "updated with {}, result: {}", + token_part, token_info->getToken()); + } + + private: + String token_part; + }; + +} diff --git a/src/Processors/Transforms/SelectByIndicesTransform.h b/src/Processors/Transforms/SelectByIndicesTransform.h index 480ab1a0f61..b44f5a3203e 100644 --- a/src/Processors/Transforms/SelectByIndicesTransform.h +++ b/src/Processors/Transforms/SelectByIndicesTransform.h @@ -26,7 +26,7 @@ public: void transform(Chunk & chunk) override { size_t num_rows = chunk.getNumRows(); - const auto * select_final_indices_info = typeid_cast(chunk.getChunkInfo().get()); + auto select_final_indices_info = chunk.getChunkInfos().extract(); if (!select_final_indices_info || !select_final_indices_info->select_final_indices) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk passed to SelectByIndicesTransform without indices column"); @@ -41,7 +41,6 @@ public: chunk.setColumns(std::move(columns), index_column->size()); } - chunk.setChunkInfo(nullptr); } }; diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 0d69b6e0a8d..4d693e5e809 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -17,9 +17,14 @@ SquashingChunksTransform::SquashingChunksTransform( void SquashingChunksTransform::onConsume(Chunk chunk) { + if (cur_chunkinfos.empty()) + cur_chunkinfos = chunk.getChunkInfos(); + if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) { cur_chunk.setColumns(block.getColumns(), block.rows()); + cur_chunk.setChunkInfos(std::move(cur_chunkinfos)); + cur_chunkinfos = {}; } } @@ -35,6 +40,8 @@ void SquashingChunksTransform::onFinish() { auto block = squashing.add({}); finish_chunk.setColumns(block.getColumns(), block.rows()); + finish_chunk.setChunkInfos(std::move(cur_chunkinfos)); + cur_chunkinfos = {}; } void SquashingChunksTransform::work() @@ -65,7 +72,10 @@ void SimpleSquashingChunksTransform::transform(Chunk & chunk) if (!finished) { if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) + { chunk.setColumns(block.getColumns(), block.rows()); + chunk.setChunkInfos(chunk.getChunkInfos()); + } } else { diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index f82e9e46a61..6de96d4100d 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -3,6 +3,7 @@ #include #include #include +#include "Processors/Chunk.h" namespace DB { @@ -25,6 +26,7 @@ protected: private: SquashingTransform squashing; Chunk cur_chunk; + Chunk::ChunkInfoCollection cur_chunkinfos; Chunk finish_chunk; }; diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index aa86879e62c..59fceccb538 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -150,11 +150,7 @@ void TotalsHavingTransform::transform(Chunk & chunk) /// Block with values not included in `max_rows_to_group_by`. We'll postpone it. if (overflow_row) { - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in TotalsHavingTransform."); - - const auto * agg_info = typeid_cast(info.get()); + const auto & agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in TotalsHavingTransform."); diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 70f30faa5b1..056f8d07627 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -23,9 +24,12 @@ #include #include #include +#include "Processors/Chunk.h" +#include "Processors/Transforms/NumberBlocksTransform.h" #include #include +#include namespace ProfileEvents @@ -120,6 +124,7 @@ private: { QueryPipeline pipeline; PullingPipelineExecutor executor; + Chunk::ChunkInfoCollection chunk_infos; explicit State(QueryPipeline pipeline_) : pipeline(std::move(pipeline_)) @@ -137,7 +142,7 @@ class PushingToLiveViewSink final : public SinkToStorage public: PushingToLiveViewSink(const Block & header, StorageLiveView & live_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToLiveViewSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; private: StorageLiveView & live_view; @@ -151,7 +156,7 @@ class PushingToWindowViewSink final : public SinkToStorage public: PushingToWindowViewSink(const Block & header, StorageWindowView & window_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToWindowViewSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; private: StorageWindowView & window_view; @@ -215,50 +220,6 @@ std::optional generateViewChain( const auto & insert_settings = insert_context->getSettingsRef(); - // Do not deduplicate insertions into MV if the main insertion is Ok - if (disable_deduplication_for_children) - { - insert_context->setSetting("insert_deduplicate", Field{false}); - } - else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views && - !insert_settings.insert_deduplication_token.value.empty()) - { - - /// TODO! - /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle - * deduplication in complex INSERT flows. - * - * Example: - * - * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1 - * | | - * └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘ - * - * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will - * be inserted into `ds_2_1`. - * - * We are forced to use view id instead of table id because there are some possible INSERT flows where no tables - * are involved. - * - * Example: - * - * landing -┬--> mv_1_1 --┬-> ds_1_1 - * | | - * └--> mv_1_2 --┘ - * - */ - auto insert_deduplication_token = insert_settings.insert_deduplication_token.value; - - if (view_id.hasUUID()) - insert_deduplication_token += "_" + toString(view_id.uuid); - else - insert_deduplication_token += "_" + view_id.getFullNameNotQuoted(); - - LOG_DEBUG(getLogger("PushingToViews"), "insert_deduplication_token {}", insert_deduplication_token); - - insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); - } - // Processing of blocks for MVs is done block by block, and there will // be no parallel reading after (plus it is not a costless operation) select_context->setSetting("parallelize_output_from_storages", Field{false}); @@ -364,12 +325,22 @@ std::optional generateViewChain( insert_columns.emplace_back(column.name); } - InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false); + InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false, false); /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false` bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, check_access); + out.addSource(std::make_shared("Before inner chain", !disable_deduplication_for_children, out.getInputHeader())); + + if (!disable_deduplication_for_children) + { + String addition_part = view_id.hasUUID() ? toString(view_id.uuid) : view_id.getFullNameNotQuoted(); + out.addSource(std::make_shared(fmt::format(":mv-{}", addition_part), out.getInputHeader())); + } + + out.addSource(std::make_shared("Before extend token", !disable_deduplication_for_children, out.getInputHeader())); + if (interpreter.shouldAddSquashingFroStorage(inner_table)) { bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); @@ -381,6 +352,8 @@ std::optional generateViewChain( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } + out.addSource(std::make_shared("Before squashing", !disable_deduplication_for_children, out.getInputHeader())); + auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); counting->setProgressCallback(insert_context->getProgressCallback()); @@ -422,11 +395,20 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { + out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); + + if (!disable_deduplication_for_children) + { + out.addSource(std::make_shared(out.getInputHeader())); + } + auto executing_inner_query = std::make_shared( storage_header, views_data->views.back(), views_data); executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms); out.addSource(std::move(executing_inner_query)); + + out.addSource(std::make_shared("Right before Inner query", !disable_deduplication_for_children, out.getInputHeader())); } return out; @@ -641,6 +623,9 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), std::make_shared(std::move(converting)))); + pipeline.addTransform(std::make_shared(pipeline.getHeader())); + //pipeline.addTransform(std::make_shared(pipeline.getHeader())); + return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } @@ -743,6 +728,7 @@ void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); state.emplace(process(block, view, *views_data)); + state->chunk_infos = chunk.getChunkInfos(); } @@ -760,6 +746,9 @@ ExecutingInnerQueryFromViewTransform::GenerateResult ExecutingInnerQueryFromView break; } + // here are we copy chunk_infos to the all chunks generated from the one consumed chunk + res.chunk.getChunkInfos().append(state->chunk_infos.clone()); + if (res.is_done) state.reset(); @@ -774,10 +763,10 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi { } -void PushingToLiveViewSink::consume(Chunk chunk) +void PushingToLiveViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(getHeader().cloneWithColumns(chunk.detachColumns()), context); + live_view.writeBlock(getHeader().cloneWithColumns(chunk.getColumns()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -797,11 +786,11 @@ PushingToWindowViewSink::PushingToWindowViewSink( { } -void PushingToWindowViewSink::consume(Chunk chunk) +void PushingToWindowViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader().cloneWithColumns(chunk.detachColumns()), context); + window_view, getHeader().cloneWithColumns(chunk.getColumns()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index f0b2ead687e..a9e5b1535c0 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -193,7 +193,7 @@ public: return concurrency_control; } - void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); } + void addResources(QueryPlanResourceHolder resources_) { resources.append(std::move(resources_)); } void setQueryIdHolder(std::shared_ptr query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); } void addContext(ContextPtr context) { resources.interpreter_context.emplace_back(std::move(context)); } diff --git a/src/QueryPipeline/QueryPlanResourceHolder.cpp b/src/QueryPipeline/QueryPlanResourceHolder.cpp index 2cd4dc42a83..bb2be2c8ffb 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.cpp +++ b/src/QueryPipeline/QueryPlanResourceHolder.cpp @@ -5,7 +5,7 @@ namespace DB { -QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept +QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolder && rhs) noexcept { table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end()); storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end()); @@ -16,6 +16,12 @@ QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHo return *this; } +QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept +{ + append(std::move(rhs)); + return *this; +} + QueryPlanResourceHolder::QueryPlanResourceHolder() = default; QueryPlanResourceHolder::QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept = default; QueryPlanResourceHolder::~QueryPlanResourceHolder() = default; diff --git a/src/QueryPipeline/QueryPlanResourceHolder.h b/src/QueryPipeline/QueryPlanResourceHolder.h index ed9eb68b7ba..e40fa04f72c 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.h +++ b/src/QueryPipeline/QueryPlanResourceHolder.h @@ -19,9 +19,12 @@ struct QueryPlanResourceHolder QueryPlanResourceHolder(); QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept; ~QueryPlanResourceHolder(); + + QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &) = delete; /// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs. QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &&) noexcept; + QueryPlanResourceHolder & append(QueryPlanResourceHolder &&) noexcept; /// Some processors may implicitly use Context or temporary Storage created by Interpreter. /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index e556bda2561..2e3096683d0 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -134,7 +134,7 @@ DistributedSink::DistributedSink( } -void DistributedSink::consume(Chunk chunk) +void DistributedSink::consume(Chunk & chunk) { if (is_first_chunk) { @@ -142,7 +142,7 @@ void DistributedSink::consume(Chunk chunk) is_first_chunk = false; } - auto ordinary_block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto ordinary_block = getHeader().cloneWithColumns(chunk.getColumns()); if (insert_sync) writeSync(ordinary_block); @@ -420,7 +420,7 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si /// to resolve tables (in InterpreterInsertQuery::getTable()) auto copy_query_ast = query_ast->clone(); - InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized); + InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized, false, false, false); auto block_io = interp.execute(); job.pipeline = std::move(block_io.pipeline); @@ -715,7 +715,7 @@ void DistributedSink::writeToLocal(const Cluster::ShardInfo & shard_info, const try { - InterpreterInsertQuery interp(query_ast, context, allow_materialized); + InterpreterInsertQuery interp(query_ast, context, allow_materialized, false, false, false); auto block_io = interp.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index a4c95633595..5b7396f2c6f 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -49,7 +49,7 @@ public: const Names & columns_to_send_); String getName() const override { return "DistributedSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; private: diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index abd4b4ce23b..6ca4ec6e079 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -740,7 +740,7 @@ bool StorageFileLog::streamToViews() auto new_context = Context::createCopy(getContext()); - InterpreterInsertQuery interpreter(insert, new_context, false, true, true); + InterpreterInsertQuery interpreter(insert, new_context, false, true, true, false); auto block_io = interpreter.execute(); /// Each stream responsible for closing it's files and store meta diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 0f3b03f0955..1ca7c1f71d0 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -803,12 +803,12 @@ public: String getName() const override { return "HDFSSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onCancel() override diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 03a30d47d91..7b19dacb4c9 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1098,7 +1098,7 @@ bool StorageKafka::streamToViews() // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true); + InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true, false); auto block_io = interpreter.execute(); // Create a stream for each consumer and join them in a union stream diff --git a/src/Storages/LiveView/LiveViewSink.h b/src/Storages/LiveView/LiveViewSink.h index 792133ced64..9803fa0a160 100644 --- a/src/Storages/LiveView/LiveViewSink.h +++ b/src/Storages/LiveView/LiveViewSink.h @@ -71,9 +71,9 @@ public: new_hash.reset(); } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); block.updateHash(*new_hash); new_blocks->push_back(std::move(block)); } diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index bc8cb0ce69a..57d75b969c3 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -377,7 +377,7 @@ void RefreshTask::executeRefreshUnlocked(std::shared_ptr(task->getInfo().data_part->info.level)); + return ChunkAndProgress{ - .chunk = Chunk(ordered_columns, res.row_count, add_part_level ? std::make_shared(task->getInfo().data_part->info.level) : nullptr), + .chunk = std::move(chunk), .num_read_rows = res.num_read_rows, .num_read_bytes = res.num_read_bytes, .is_finished = false}; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index fbb48b37482..8841f490e38 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -275,7 +275,10 @@ try ++it; } - return Chunk(std::move(res_columns), rows_read, add_part_level ? std::make_shared(data_part->info.level) : nullptr); + auto result = Chunk(std::move(res_columns), rows_read); + if (add_part_level) + result.getChunkInfos().add(std::make_shared(data_part->info.level)); + return result; } } else diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index f0eb56aea13..2e455cd2bd5 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,8 +1,11 @@ #include #include +#include #include #include +#include "Common/Exception.h" #include +#include "Interpreters/StorageID.h" namespace ProfileEvents { @@ -56,7 +59,7 @@ void MergeTreeSink::onFinish() finishDelayedChunk(); } -void MergeTreeSink::consume(Chunk chunk) +void MergeTreeSink::consume(Chunk & chunk) { LOG_INFO(storage.log, "consume() called num_blocks_processed {}, chunks: rows {} columns {} bytes {}", num_blocks_processed, @@ -65,7 +68,7 @@ void MergeTreeSink::consume(Chunk chunk) if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(nullptr, context, false); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); @@ -80,6 +83,30 @@ void MergeTreeSink::consume(Chunk chunk) size_t streams = 0; bool support_parallel_write = false; + String block_dedup_token; + if (storage.getDeduplicationLog()) + { + auto token_info = chunk.getChunkInfos().get(); + if (!token_info && !context->getSettingsRef().insert_deduplication_token.value.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "DedupTokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); + + if (token_info) + { + block_dedup_token = token_info->getToken(); + + LOG_DEBUG(storage.log, + "dedup token from insert deduplication token in chunk: {}", + block_dedup_token); + } + else + { + LOG_DEBUG(storage.log, + "dedup token from hash is caclulated"); + } + } + for (auto & current_block : part_blocks) { ProfileEvents::Counters part_counters; @@ -99,6 +126,11 @@ void MergeTreeSink::consume(Chunk chunk) current_block.block.clear(); current_block.partition.clear(); + if (auto children_dedup_token = getDeduplicationTokenForChildren(chunk)) + { + children_dedup_token->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + } + /// If optimize_on_insert setting is true, current_block could become empty after merge /// and we didn't create part. if (!temp_part.part) @@ -107,19 +139,6 @@ void MergeTreeSink::consume(Chunk chunk) if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) support_parallel_write = true; - String block_dedup_token; - if (storage.getDeduplicationLog()) - { - const String & dedup_token = settings.insert_deduplication_token; - if (!dedup_token.empty()) - { - /// multiple blocks can be inserted within the same insert query - /// an ordinal number is added to dedup token to generate a distinctive block id for each block - block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); - ++chunk_dedup_seqnum; - } - } - size_t max_insert_delayed_streams_for_parallel_write; if (settings.max_insert_delayed_streams_for_parallel_write.changed) @@ -151,6 +170,7 @@ void MergeTreeSink::consume(Chunk chunk) partitions = DelayedPartitions{}; } + /// TODO block_dedup_token partitions.emplace_back(MergeTreeSink::DelayedChunk::Partition { .temp_part = std::move(temp_part), diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index 07ab3850df2..4e1ca5c1f60 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -25,7 +25,7 @@ public: ~MergeTreeSink() override; String getName() const override { return "MergeTreeSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onStart() override; void onFinish() override; @@ -35,13 +35,13 @@ private: size_t max_parts_per_block; ContextPtr context; StorageSnapshotPtr storage_snapshot; - UInt64 chunk_dedup_seqnum = 0; /// input chunk ordinal number in case of dedup token UInt64 num_blocks_processed = 0; /// We can delay processing for previous chunk and start writing a new one. struct DelayedChunk; std::unique_ptr delayed_chunk; + void fillDeduplicationTokenForChildren(Chunk &) const override { /* For MergeTree we get the tokens from part checksums */ } void finishDelayedChunk(); }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 2bb9aad1e53..ce140c93cbe 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -253,12 +254,12 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const } template -void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) +void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, false); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); const auto & settings = context->getSettingsRef(); @@ -284,13 +285,40 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) if constexpr (async_insert) { - const auto & chunk_info = chunk.getChunkInfo(); - if (const auto * async_insert_info_ptr = typeid_cast(chunk_info.get())) + const auto async_insert_info_ptr = chunk.getChunkInfos().get(); + if (async_insert_info_ptr) async_insert_info = std::make_shared(async_insert_info_ptr->offsets, async_insert_info_ptr->tokens); else throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } + String block_dedup_token; + if constexpr (!async_insert) + { + auto token_info = chunk.getChunkInfos().get(); + if (!token_info && !context->getSettingsRef().insert_deduplication_token.value.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "DedupTokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); + + + if (token_info) + { + /// multiple blocks can be inserted within the same insert query + /// an ordinal number is added to dedup token to generate a distinctive block id for each block + block_dedup_token = token_info->getToken(); + + LOG_DEBUG(storage.log, + "dedup token from insert deduplication token in chunk: {}", + block_dedup_token); + } + else + { + LOG_DEBUG(storage.log, + "dedup token from hash is caclulated"); + } + } + auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); using DelayedPartition = typename ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition; @@ -342,23 +370,10 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) } else { - if (deduplicate) { - String block_dedup_token; - /// We add the hash from the data and partition identifier to deduplication ID. /// That is, do not insert the same data to the same partition twice. - - const String & dedup_token = settings.insert_deduplication_token; - if (!dedup_token.empty()) - { - /// multiple blocks can be inserted within the same insert query - /// an ordinal number is added to dedup token to generate a distinctive block id for each block - block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); - ++chunk_dedup_seqnum; - } - block_id = temp_part.part->getZeroLevelPartBlockID(block_dedup_token); LOG_DEBUG(log, "Wrote block with ID '{}', {} rows{}", block_id, current_block.block.rows(), quorumLogMessage(replicas_num)); } @@ -366,6 +381,11 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) { LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } + + if (auto children_dedup_token = getDeduplicationTokenForChildren(chunk)) + { + children_dedup_token->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + } } profile_events_scope.reset(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 39623c20584..b1eff67d845 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -51,7 +51,7 @@ public: ~ReplicatedMergeTreeSinkImpl() override; void onStart() override; - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; String getName() const override { return "ReplicatedMergeTreeSink"; } @@ -139,6 +139,7 @@ private: /// We can delay processing for previous chunk and start writing a new one. std::unique_ptr delayed_chunk; + void fillDeduplicationTokenForChildren(Chunk &) const override { /* For MergeTree we get the tokens from part checksums */ } void finishDelayedChunk(const ZooKeeperWithFaultInjectionPtr & zookeeper); }; diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index 4fb81d69070..36899011e33 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -40,7 +40,7 @@ void MessageQueueSink::onFinish() producer->finish(); } -void MessageQueueSink::consume(Chunk chunk) +void MessageQueueSink::consume(Chunk & chunk) { const auto & columns = chunk.getColumns(); if (columns.empty()) diff --git a/src/Storages/MessageQueueSink.h b/src/Storages/MessageQueueSink.h index b3c1e61734f..4a9248c6c4d 100644 --- a/src/Storages/MessageQueueSink.h +++ b/src/Storages/MessageQueueSink.h @@ -35,7 +35,7 @@ public: String getName() const override { return storage_name + "Sink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onStart() override; void onFinish() override; diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 0b88a9e8929..9c6d70f2c5b 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -644,7 +644,7 @@ bool StorageNATS::streamToViews() insert->table_id = table_id; // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, nats_context, false, true, true); + InterpreterInsertQuery interpreter(insert, nats_context, false, true, true, false); auto block_io = interpreter.execute(); auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp index 09b009b26d8..ee2570756ed 100644 --- a/src/Storages/PartitionedSink.cpp +++ b/src/Storages/PartitionedSink.cpp @@ -51,7 +51,7 @@ SinkPtr PartitionedSink::getSinkForPartitionKey(StringRef partition_key) return it->second; } -void PartitionedSink::consume(Chunk chunk) +void PartitionedSink::consume(Chunk & chunk) { const auto & columns = chunk.getColumns(); @@ -104,7 +104,7 @@ void PartitionedSink::consume(Chunk chunk) for (const auto & [partition_key, partition_index] : partition_id_to_chunk_index) { auto sink = getSinkForPartitionKey(partition_key); - sink->consume(std::move(partition_index_to_chunk[partition_index])); + sink->consume(partition_index_to_chunk[partition_index]); } } diff --git a/src/Storages/PartitionedSink.h b/src/Storages/PartitionedSink.h index 68edeb6fd73..fcd67556dc9 100644 --- a/src/Storages/PartitionedSink.h +++ b/src/Storages/PartitionedSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "PartitionedSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onException(std::exception_ptr exception) override; diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index ba3cc6f58d0..57c8d24ccc2 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -697,7 +697,7 @@ void MaterializedPostgreSQLConsumer::syncTables() insert->table_id = storage->getStorageID(); insert->columns = std::make_shared(buffer->columns_ast); - InterpreterInsertQuery interpreter(insert, insert_context, true); + InterpreterInsertQuery interpreter(insert, insert_context, true, false, false, false); auto io = interpreter.execute(); auto input = std::make_shared( result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows())); diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 2bb1e2dde0d..4a5a621aa43 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -437,7 +437,7 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection auto insert_context = materialized_storage->getNestedTableContext(); - InterpreterInsertQuery interpreter(insert, insert_context); + InterpreterInsertQuery interpreter(insert, insert_context, false, false, false, false); auto block_io = interpreter.execute(); const StorageInMemoryMetadata & storage_metadata = nested_storage->getInMemoryMetadata(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index e4b19992151..5bf5ab9b2f5 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1129,7 +1129,7 @@ bool StorageRabbitMQ::tryStreamToViews() } // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true); + InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true, false); auto block_io = interpreter.execute(); block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp index c451cfd1bf5..1f7f6939f40 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp @@ -29,7 +29,7 @@ EmbeddedRocksDBSink::EmbeddedRocksDBSink( serializations = getHeader().getSerializations(); } -void EmbeddedRocksDBSink::consume(Chunk chunk) +void EmbeddedRocksDBSink::consume(Chunk & chunk) { auto rows = chunk.getNumRows(); const auto & columns = chunk.getColumns(); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.h b/src/Storages/RocksDB/EmbeddedRocksDBSink.h index 011322df829..2e1e0c7b429 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.h @@ -17,7 +17,7 @@ public: StorageEmbeddedRocksDB & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; String getName() const override { return "EmbeddedRocksDBSink"; } private: diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 01417b8977b..47e1b8feb43 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -310,7 +310,8 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt Block block; while (executor.pull(block)) { - sink->consume(Chunk{block.getColumns(), block.rows()}); + auto chunk = Chunk(block.getColumns(), block.rows()); + sink->consume(chunk); } } diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index c3a772e532c..12abd7a9849 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -498,7 +498,7 @@ bool StorageS3Queue::streamToViews() // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true); + InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true, false); auto block_io = interpreter.execute(); auto file_iterator = createFileIterator(s3queue_context, nullptr); diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 9c551e82a99..5dc407bf86d 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -600,12 +600,12 @@ public: String getName() const override { return "StorageAzureBlobSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onCancel() override diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index d9a0b2b4d59..d4defd92196 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -605,7 +605,7 @@ public: String getName() const override { return "BufferSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { size_t rows = chunk.getNumRows(); if (!rows) @@ -1018,7 +1018,7 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl auto insert_context = Context::createCopy(getContext()); insert_context->makeQueryContext(); - InterpreterInsertQuery interpreter{insert, insert_context, allow_materialized}; + InterpreterInsertQuery interpreter(insert, insert_context, allow_materialized, false, false, false); auto block_io = interpreter.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 7b5916c0273..0478936fdfc 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1047,7 +1047,7 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu const auto & shard_info = shards_info[shard_index]; if (shard_info.isLocal()) { - InterpreterInsertQuery interpreter(new_query, query_context); + InterpreterInsertQuery interpreter(new_query, query_context, false, false, false, false); pipeline.addCompletedPipeline(interpreter.execute().pipeline); } else diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 76d75a368b3..581e0f87f15 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1770,12 +1770,12 @@ public: String getName() const override { return "StorageFileSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { std::lock_guard cancel_lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onCancel() override diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 20f99070000..c80e799a92b 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -119,10 +119,10 @@ public: std::string getName() const override { return "StorageKeeperMapSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -1248,7 +1248,10 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca Block block; while (executor.pull(block)) - sink->consume(Chunk{block.getColumns(), block.rows()}); + { + auto chunk = Chunk(block.getColumns(), block.rows()); + sink->consume(chunk); + } sink->finalize(strict); } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 25c48de94e1..fad31e8ae03 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -337,7 +337,7 @@ public: } } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; private: @@ -388,9 +388,9 @@ private: }; -void LogSink::consume(Chunk chunk) +void LogSink::consume(Chunk & chunk) { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); metadata_snapshot->check(block, true); for (auto & stream : streams | boost::adaptors::map_values) diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index f69c4adb552..b1bd7053c2e 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -63,7 +63,7 @@ public: String getName() const override { return "MemorySink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); storage_snapshot->metadata->check(block, true); diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 62a2a048642..e0818fafae9 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include @@ -107,12 +106,12 @@ public: String getName() const override { return "StorageMongoDBSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { Poco::MongoDB::Database db(db_name); Poco::MongoDB::Document::Vector documents; - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); size_t num_rows = block.rows(); size_t num_cols = block.columns(); diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index da391909dff..2a8a7bd2ee7 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -151,9 +151,9 @@ public: String getName() const override { return "StorageMySQLSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); auto blocks = splitBlocks(block, max_batch_rows); mysqlxx::Transaction trans(entry); try diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 9379cb5a1c6..c99de3e3588 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -163,9 +163,9 @@ public: String getName() const override { return "PostgreSQLSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); if (!inserter) { if (on_conflict.empty()) diff --git a/src/Storages/StorageRedis.cpp b/src/Storages/StorageRedis.cpp index 83bb3c606c9..1a275320f43 100644 --- a/src/Storages/StorageRedis.cpp +++ b/src/Storages/StorageRedis.cpp @@ -147,7 +147,7 @@ class RedisSink : public SinkToStorage public: RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; String getName() const override { return "RedisSink"; } private: @@ -169,10 +169,10 @@ RedisSink::RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadat } } -void RedisSink::consume(Chunk chunk) +void RedisSink::consume(Chunk & chunk) { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -567,7 +567,8 @@ void StorageRedis::mutate(const MutationCommands & commands, ContextPtr context_ Block block; while (executor.pull(block)) { - sink->consume(Chunk{block.getColumns(), block.rows()}); + Chunk chunk(block.getColumns(), block.rows()); + sink->consume(chunk); } } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 9768653f3fe..7975b42ac02 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1100,12 +1100,12 @@ public: String getName() const override { return "StorageS3Sink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onCancel() override diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 179e4cee199..85417a2f2a4 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -141,7 +141,7 @@ public: String getName() const override { return "SQLiteSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString sqlbuf; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 54218351cf1..4105e8decd3 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -44,7 +44,7 @@ public: const String & backup_file_name_, bool persistent_); String getName() const override { return "SetOrJoinSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; private: @@ -82,9 +82,9 @@ SetOrJoinSink::SetOrJoinSink( { } -void SetOrJoinSink::consume(Chunk chunk) +void SetOrJoinSink::consume(Chunk & chunk) { - Block block = getHeader().cloneWithColumns(chunk.detachColumns()); + Block block = getHeader().cloneWithColumns(chunk.getColumns()); table.insertBlock(block, getContext()); if (persistent) diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 48389dccf48..7fa5a5670a3 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -217,9 +217,9 @@ public: } } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - block_out->write(getHeader().cloneWithColumns(chunk.detachColumns())); + block_out->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onFinish() override diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 8a71a771367..c21d24ac2e5 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -559,12 +559,12 @@ StorageURLSink::StorageURLSink( } -void StorageURLSink::consume(Chunk chunk) +void StorageURLSink::consume(Chunk & chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void StorageURLSink::onCancel() diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 5aca3df1513..e90585c79ca 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -251,7 +251,7 @@ public: const String & method = Poco::Net::HTTPRequest::HTTP_POST); std::string getName() const override { return "StorageURLSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onCancel() override; void onException(std::exception_ptr exception) override; void onFinish() override; diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 7afa1894a64..d295bebe615 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -119,7 +119,7 @@ public: ZooKeeperSink(const Block & header, ContextPtr context) : SinkToStorage(header), zookeeper(context->getZooKeeper()) { } String getName() const override { return "ZooKeeperSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); size_t rows = block.rows(); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index a9ec1f6c694..e0f3b437af7 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -305,7 +305,7 @@ namespace public: explicit AddingAggregatedChunkInfoTransform(Block header) : ISimpleTransform(header, header, false) { } - void transform(Chunk & chunk) override { chunk.setChunkInfo(std::make_shared()); } + void transform(Chunk & chunk) override { chunk.getChunkInfos().add(std::make_shared()); } String getName() const override { return "AddingAggregatedChunkInfoTransform"; } }; @@ -690,7 +690,7 @@ inline void StorageWindowView::fire(UInt32 watermark) StoragePtr target_table = getTargetTable(); auto insert = std::make_shared(); insert->table_id = target_table->getStorageID(); - InterpreterInsertQuery interpreter(insert, getContext()); + InterpreterInsertQuery interpreter(insert, getContext(), false, false, false, false); auto block_io = interpreter.execute(); auto pipe = Pipe(std::make_shared(blocks, header)); diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python index 3cd29247910..87c48a73513 100644 --- a/tests/queries/0_stateless/03008_deduplication.python +++ b/tests/queries/0_stateless/03008_deduplication.python @@ -49,19 +49,40 @@ def instance_create_statement(table_name, table_columns, table_keys, table_engin return __format(template, **params) -def instance_insert_statement(table_name, count, insert_unique_blocks, use_insert_token): - template = """ - INSERT INTO {table_name} - SELECT {insert_columns} - FROM numbers({count}) {insert_settings}; - """ - return __format( - template, - table_name=table_name, - count=count, - insert_columns="'src_4', 4" if not insert_unique_blocks else "'src_' || toString(number), number", - insert_settings="" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'", - ) +def instance_insert_statement(table_name, count, insert_method, insert_unique_blocks, use_insert_token): + insert_settings = "" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'" + + if insert_method == 'InsertSelect': + template = """ + INSERT INTO {table_name} + SELECT {insert_columns} + FROM numbers({count}) {insert_settings}; + """ + return __format( + template, + table_name=table_name, + count=count, + insert_columns="'src_4', 4" if not insert_unique_blocks else "'src_' || toString(number), number", + insert_settings=insert_settings, + ) + + else: + template = """ + INSERT INTO {table_name} + {insert_settings} VALUES {insert_values}; + """ + + values = [] + for i in range(count): + values += [f"('src_{i}', {i})"] if insert_unique_blocks else ["('src_4', 4)"] + insert_values = ", ".join(values) + + return __format( + template, + table_name=table_name, + insert_settings=insert_settings, + insert_values=insert_values, + ) def get_drop_tables_statements(tables): @@ -109,6 +130,10 @@ class ArgsFactory: def add_opt_uniq_blocks(self): self.__parser.add_argument("--insert-unique-blocks", type=str2bool, nargs='?', const=True, default=True) + def add_opt_insert_method(self): + self.__parser.add_argument( + "--insert-method", choices=["InsertSelect", "InsertValues"], default="InsertSelect") + def add_all(self): self.add_opt_engine() self.add_opt_user_token() @@ -116,6 +141,7 @@ class ArgsFactory: self.add_opt_dedup_src() self.add_opt_dedup_dst() self.add_opt_get_logs() + self.add_opt_insert_method() self.add_opt_uniq_blocks() @@ -151,14 +177,14 @@ def test_insert_several_blocks(parser): drop_tables_statements = get_drop_tables_statements( ["table_a_b", "table_when_b_even", "mv_b_even"] ) insert_statement = instance_insert_statement( - "table_a_b", 10, args.insert_unique_blocks, args.use_insert_token + "table_a_b", 10, args.insert_method, args.insert_unique_blocks, args.use_insert_token ) print_details_statements = f""" SELECT 'table_a_b'; SELECT 'count', count() FROM table_a_b; {"" if not args.get_logs else "SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part;"} - + SELECT 'table_when_b_even'; SELECT 'count', count() FROM table_when_b_even; {"" if not args.get_logs else "SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part;"} @@ -209,37 +235,37 @@ def test_insert_several_blocks(parser): script = f""" {get_logs_statement(args)} - + SET max_insert_threads={1 if args.single_thread else 10}; SET update_insert_deduplication_token_in_dependent_materialized_views=1; SET deduplicate_blocks_in_dependent_materialized_views=1; - + SET max_block_size=1; SET min_insert_block_size_rows=0; SET min_insert_block_size_bytes=0; - + {drop_tables_statements} - + {create_table_a_b_statement} - + {create_table_when_b_even_statement} - + {create_mv_statement} - + -- first insert {insert_statement} - + {print_details_statements} - + {assert_first_insert_statements} - + -- second insert, it is retry {insert_statement} - + {print_details_statements} - + {assert_second_insert_statements} - + {drop_tables_statements} """ @@ -279,13 +305,13 @@ def test_mv_generates_several_blocks(parser): ) insert_statement = instance_insert_statement( - "table_a_b", 5, args.insert_unique_blocks, args.use_insert_token + "table_a_b", 5, args.insert_method, args.insert_unique_blocks, args.use_insert_token ) details_print_statements = f""" SELECT 'table_a_b'; SELECT 'count', count() FROM table_a_b; - + SELECT 'table_when_b_even_and_joined'; SELECT 'count', count() FROM table_when_b_even_and_joined; {"" if not args.get_logs else "SELECT _part, a_src, a_join, b FROM table_when_b_even_and_joined ORDER BY _part;"} @@ -295,7 +321,7 @@ def test_mv_generates_several_blocks(parser): assert_first_insert_statements = f""" SELECT throwIf( count() != 5 ) FROM table_a_b; - + SELECT throwIf( count() != 47 ) FROM table_when_b_even_and_joined; """ @@ -311,7 +337,7 @@ def test_mv_generates_several_blocks(parser): assert_first_insert_statements = f""" SELECT throwIf( count() != {5 if args.deduplicate_src_table else 5} ) FROM table_a_b; - + SELECT throwIf( count() != {45 if args.deduplicate_dst_table else 45} ) FROM table_when_b_even_and_joined; """ @@ -326,14 +352,14 @@ def test_mv_generates_several_blocks(parser): assert_first_insert_statements = f""" SELECT throwIf( count() != {1 if args.deduplicate_src_table else 5} ) FROM table_a_b; - + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 45} ) FROM table_when_b_even_and_joined; """ assert_second_insert_statements = f""" SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) FROM table_a_b; - + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 90} ) FROM table_when_b_even_and_joined; """ @@ -344,13 +370,13 @@ def test_mv_generates_several_blocks(parser): SET max_insert_threads={1 if args.single_thread else 10}; SET update_insert_deduplication_token_in_dependent_materialized_views=1; SET deduplicate_blocks_in_dependent_materialized_views=1; - + SET max_block_size=1; SET min_insert_block_size_rows=0; SET min_insert_block_size_bytes=0; - + {drop_tables_statements} - + CREATE TABLE table_for_join_with (a_join String, b UInt64) ENGINE = MergeTree() @@ -359,13 +385,13 @@ def test_mv_generates_several_blocks(parser): SELECT 'joined_' || toString(number), number FROM numbers(9); {details_print_for_table_for_join_with} - + {create_table_a_b_statement} SYSTEM STOP MERGES table_a_b; - + {create_table_when_b_even_and_joined_statement} SYSTEM STOP MERGES table_when_b_even_and_joined; - + CREATE MATERIALIZED VIEW mv_b_even TO table_when_b_even_and_joined AS @@ -377,20 +403,20 @@ def test_mv_generates_several_blocks(parser): -- first insert {insert_statement} - + {details_print_statements} - + -- first assertion {assert_first_insert_statements} - + -- second insert {insert_statement} - + {details_print_statements} - + -- second assertion {assert_second_insert_statements} - + {drop_tables_statements} """ @@ -423,12 +449,12 @@ def test_several_mv_into_one_table(parser): ) insert_statement = instance_insert_statement( - "table_src", 8, args.insert_unique_blocks, args.use_insert_token + "table_src", 8, args.insert_method, args.insert_unique_blocks, args.use_insert_token ) details_print_statements = f""" SELECT 'table_src count', count() FROM table_src; - + SELECT 'table_dst count', count() FROM table_dst; {"" if not args.get_logs else "SELECT _part, count() FROM table_dst GROUP BY _part ORDER BY _part;"} """ @@ -453,7 +479,7 @@ def test_several_mv_into_one_table(parser): assert_first_insert_statements = f""" SELECT throwIf( count() != {8 if args.deduplicate_src_table else 8} ) FROM table_src; - + SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 16} ) FROM table_dst; """ @@ -469,7 +495,7 @@ def test_several_mv_into_one_table(parser): SELECT throwIf( count() != {1 if args.deduplicate_src_table else 8} ) FROM table_src; - SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 16} ) + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 16} ) FROM table_dst; """ assert_second_insert_statements = f""" @@ -486,13 +512,13 @@ def test_several_mv_into_one_table(parser): SET max_insert_threads={1 if args.single_thread else 10}; SET update_insert_deduplication_token_in_dependent_materialized_views=1; SET deduplicate_blocks_in_dependent_materialized_views=1; - + SET max_block_size=1; SET min_insert_block_size_rows=0; SET min_insert_block_size_bytes=0; - + {drop_tables_statements} - + {create_table_src_statement} {create_table_dst_statement} @@ -503,7 +529,7 @@ def test_several_mv_into_one_table(parser): SELECT a, b FROM table_src WHERE b % 2 = 0; - + CREATE MATERIALIZED VIEW mv_b_even_even TO table_dst AS @@ -515,16 +541,16 @@ def test_several_mv_into_one_table(parser): {insert_statement} {details_print_statements} - + {assert_first_insert_statements} -- second insert, retry {insert_statement} - + {details_print_statements} {assert_second_insert_statements} - + {drop_tables_statements} """ diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference index 35b2642a4d2..9b4738ce805 100644 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference @@ -1,5 +1,5 @@ -Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -14,7 +14,7 @@ count 5 0 OK -Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -29,7 +29,7 @@ count 10 0 OK -Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -44,7 +44,7 @@ count 10 0 OK -Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -59,7 +59,7 @@ count 20 0 OK -Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -74,7 +74,7 @@ count 5 0 OK -Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -89,7 +89,7 @@ count 10 0 OK -Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -104,7 +104,7 @@ count 10 0 OK -Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -119,35 +119,35 @@ count 20 0 OK -Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 1 table_when_b_even count 1 EXPECTED_TO_FAIL -Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even count 1 EXPECTED_TO_FAIL -Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 1 table_when_b_even count 5 EXPECTED_TO_FAIL -Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even count 10 EXPECTED_TO_FAIL -Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -155,7 +155,7 @@ count 1 0 EXPECTED_TO_FAIL -Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -163,7 +163,7 @@ count 1 0 EXPECTED_TO_FAIL -Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -178,7 +178,7 @@ count 10 0 OK -Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -193,7 +193,7 @@ count 20 0 OK -Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -208,7 +208,7 @@ count 5 0 OK -Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -223,7 +223,7 @@ count 1 0 OK -Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -238,7 +238,7 @@ count 10 0 OK -Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -253,7 +253,7 @@ count 20 0 OK -Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -268,7 +268,7 @@ count 5 0 OK -Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -283,7 +283,7 @@ count 1 0 OK -Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -298,7 +298,7 @@ count 10 0 OK -Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -313,7 +313,7 @@ count 20 0 OK -Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -328,7 +328,7 @@ count 5 0 OK -Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -343,7 +343,7 @@ count 1 0 OK -Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -358,7 +358,7 @@ count 10 0 OK -Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -373,7 +373,7 @@ count 20 0 OK -Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -388,7 +388,7 @@ count 5 0 OK -Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -403,7 +403,7 @@ count 1 0 OK -Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -418,7 +418,7 @@ count 10 0 OK -Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -433,7 +433,7 @@ count 20 0 OK -Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -448,7 +448,7 @@ count 5 0 OK -Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -463,7 +463,7 @@ count 10 0 OK -Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -478,7 +478,7 @@ count 10 0 OK -Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -493,7 +493,7 @@ count 20 0 OK -Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -508,7 +508,7 @@ count 5 0 OK -Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -523,7 +523,7 @@ count 10 0 OK -Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -538,7 +538,7 @@ count 10 0 OK -Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -553,35 +553,35 @@ count 20 0 OK -Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 1 table_when_b_even count 1 EXPECTED_TO_FAIL -Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even count 1 EXPECTED_TO_FAIL -Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 1 table_when_b_even count 5 EXPECTED_TO_FAIL -Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even count 10 EXPECTED_TO_FAIL -Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -589,7 +589,7 @@ count 1 0 EXPECTED_TO_FAIL -Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -597,7 +597,7 @@ count 1 0 EXPECTED_TO_FAIL -Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -612,7 +612,7 @@ count 10 0 OK -Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -627,7 +627,7 @@ count 20 0 OK -Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -642,7 +642,7 @@ count 5 0 OK -Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -657,7 +657,7 @@ count 1 0 OK -Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -672,7 +672,7 @@ count 10 0 OK -Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -687,7 +687,7 @@ count 20 0 OK -Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -702,7 +702,7 @@ count 5 0 OK -Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -717,7 +717,7 @@ count 1 0 OK -Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -732,7 +732,7 @@ count 10 0 OK -Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -747,7 +747,7 @@ count 20 0 OK -Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -762,7 +762,7 @@ count 5 0 OK -Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -777,7 +777,7 @@ count 1 0 OK -Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -792,7 +792,7 @@ count 10 0 OK -Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even @@ -807,7 +807,7 @@ count 20 0 OK -Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -822,7 +822,7 @@ count 5 0 OK -Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even @@ -837,7 +837,7 @@ count 1 0 OK -Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 10 table_when_b_even @@ -852,7 +852,967 @@ count 10 0 OK -Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +FIXED + +Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +FIXED + +Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +FIXED + +Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +FIXED + +Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +FIXED + +Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +FIXED + +Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +FIXED + +Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 10 table_when_b_even diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh index 5b07f6033ad..ed50110b7eb 100755 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh @@ -15,7 +15,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # fails, it is a error. The same situation as first one, but on dst table. RUN_ONLY="" -#RUN_ONLY="" +#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" KNOWN_ERRORS=(8 9 10 11 12 13) @@ -23,7 +23,7 @@ function is_known_error() { n=$1 for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then return 0 fi done @@ -31,56 +31,61 @@ function is_known_error() } i=0 -for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do +for insert_method in "InsertSelect" "InsertValues"; do + for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do - THIS_RUN="Test case $i:" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done done done done diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference index eccdbd52f37..4411bdecea8 100644 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference @@ -1,5 +1,5 @@ -Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -14,7 +14,7 @@ count 47 0 OK -Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -29,7 +29,7 @@ count 45 0 OK -Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -44,7 +44,7 @@ count 94 0 OK -Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -59,7 +59,7 @@ count 90 0 OK -Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -74,7 +74,7 @@ count 47 0 OK -Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -89,7 +89,7 @@ count 45 0 OK -Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -104,7 +104,7 @@ count 94 0 OK -Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -119,35 +119,35 @@ count 90 0 OK -Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 1 table_when_b_even_and_joined count 10 EXPECTED_TO_FAIL -Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined count 9 EXPECTED_TO_FAIL -Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 1 table_when_b_even_and_joined count 47 EXPECTED_TO_FAIL -Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined count 45 EXPECTED_TO_FAIL -Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -155,7 +155,7 @@ count 10 0 EXPECTED_TO_FAIL -Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -163,7 +163,7 @@ count 9 0 EXPECTED_TO_FAIL -Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -178,7 +178,7 @@ count 94 0 OK -Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -193,7 +193,7 @@ count 90 0 OK -Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -201,7 +201,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -216,7 +216,7 @@ count 9 0 OK -Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -231,7 +231,7 @@ count 94 0 OK -Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -246,7 +246,7 @@ count 90 0 OK -Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -254,7 +254,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -269,7 +269,7 @@ count 9 0 OK -Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -284,7 +284,7 @@ count 94 0 OK -Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -299,7 +299,7 @@ count 90 0 OK -Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -307,7 +307,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -322,7 +322,7 @@ count 9 0 OK -Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -337,7 +337,7 @@ count 94 0 OK -Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -352,7 +352,7 @@ count 90 0 OK -Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -360,7 +360,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -375,7 +375,7 @@ count 9 0 OK -Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -390,7 +390,7 @@ count 94 0 OK -Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -405,7 +405,7 @@ count 90 0 OK -Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -420,7 +420,7 @@ count 47 0 OK -Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -435,7 +435,7 @@ count 45 0 OK -Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -450,7 +450,7 @@ count 94 0 OK -Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -465,7 +465,7 @@ count 90 0 OK -Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -480,7 +480,7 @@ count 47 0 OK -Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -495,7 +495,7 @@ count 45 0 OK -Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -510,7 +510,7 @@ count 94 0 OK -Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -525,35 +525,35 @@ count 90 0 OK -Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 1 table_when_b_even_and_joined count 10 EXPECTED_TO_FAIL -Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined count 9 EXPECTED_TO_FAIL -Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 1 table_when_b_even_and_joined count 47 EXPECTED_TO_FAIL -Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined count 45 EXPECTED_TO_FAIL -Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -561,7 +561,7 @@ count 10 0 EXPECTED_TO_FAIL -Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -569,7 +569,7 @@ count 9 0 EXPECTED_TO_FAIL -Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -584,7 +584,7 @@ count 94 0 OK -Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -599,7 +599,7 @@ count 90 0 OK -Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -607,7 +607,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -622,7 +622,7 @@ count 9 0 OK -Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -637,7 +637,7 @@ count 94 0 OK -Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -652,7 +652,7 @@ count 90 0 OK -Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -660,7 +660,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -675,7 +675,7 @@ count 9 0 OK -Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -690,7 +690,7 @@ count 94 0 OK -Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -705,7 +705,7 @@ count 90 0 OK -Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -713,7 +713,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -728,7 +728,7 @@ count 9 0 OK -Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -743,7 +743,7 @@ count 94 0 OK -Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 1 table_when_b_even_and_joined @@ -758,7 +758,7 @@ count 90 0 OK -Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -766,7 +766,7 @@ count 14 0 EXPECTED_TO_FAIL -Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined @@ -781,7 +781,7 @@ count 9 0 OK -Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined @@ -796,7 +796,911 @@ count 94 0 OK -Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED + +Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED + +Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +FIXED + +Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +FIXED + +Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED + +Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED + +Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED + +Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED + +Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +FIXED + +Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +FIXED + +Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED + +Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED + +Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 14 +0 +EXPECTED_TO_FAIL + +Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh index 1dd648583c6..61996905135 100755 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh @@ -29,7 +29,7 @@ function is_known_error() { n=$1 for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then return 0 fi done @@ -37,56 +37,61 @@ function is_known_error() } i=0 -for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do +for insert_method in "InsertSelect" "InsertValues"; do + for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do - THIS_RUN="Test case $i:" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done done done done diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference index 12eea604e3a..a56f7deb744 100644 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference @@ -1,5 +1,5 @@ -Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -10,7 +10,7 @@ table_dst count 6 0 OK -Test case 1: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -21,7 +21,7 @@ table_dst count 16 0 OK -Test case 2: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -32,7 +32,7 @@ table_dst count 12 0 OK -Test case 3: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -43,7 +43,7 @@ table_dst count 32 0 OK -Test case 4: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -54,7 +54,7 @@ table_dst count 6 0 OK -Test case 5: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -65,7 +65,7 @@ table_dst count 16 0 OK -Test case 6: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -76,7 +76,7 @@ table_dst count 12 0 OK -Test case 7: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -87,39 +87,39 @@ table_dst count 32 0 OK -Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 1 table_dst count 2 EXPECTED_TO_FAIL -Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 2 EXPECTED_TO_FAIL -Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 1 table_dst count 6 EXPECTED_TO_FAIL -Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 EXPECTED_TO_FAIL -Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 2 0 EXPECTED_TO_FAIL -Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 2 0 EXPECTED_TO_FAIL -Test case 14: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -130,7 +130,7 @@ table_dst count 12 0 OK -Test case 15: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -141,13 +141,13 @@ table_dst count 32 0 OK -Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 1 0 @@ -157,7 +157,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 18: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -168,7 +168,7 @@ table_dst count 12 0 OK -Test case 19: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 0 @@ -179,13 +179,13 @@ table_dst count 32 0 OK -Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 1 0 @@ -195,7 +195,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 22: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -206,7 +206,7 @@ table_dst count 12 0 OK -Test case 23: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -217,13 +217,13 @@ table_dst count 32 0 OK -Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 1 0 @@ -233,7 +233,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 26: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -244,7 +244,7 @@ table_dst count 12 0 OK -Test case 27: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 0 @@ -255,13 +255,13 @@ table_dst count 32 0 OK -Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 1 0 @@ -271,7 +271,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 30: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -282,7 +282,7 @@ table_dst count 12 0 OK -Test case 31: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -293,7 +293,7 @@ table_dst count 32 0 OK -Test case 32: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -304,7 +304,7 @@ table_dst count 6 0 OK -Test case 33: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -315,7 +315,7 @@ table_dst count 16 0 OK -Test case 34: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -326,7 +326,7 @@ table_dst count 12 0 OK -Test case 35: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -337,7 +337,7 @@ table_dst count 32 0 OK -Test case 36: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -348,7 +348,7 @@ table_dst count 6 0 OK -Test case 37: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -359,7 +359,7 @@ table_dst count 16 0 OK -Test case 38: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -370,7 +370,7 @@ table_dst count 12 0 OK -Test case 39: engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -381,39 +381,39 @@ table_dst count 32 0 OK -Test case 40: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 1 table_dst count 2 EXPECTED_TO_FAIL -Test case 41: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 2 EXPECTED_TO_FAIL -Test case 42: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 1 table_dst count 6 EXPECTED_TO_FAIL -Test case 43: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 EXPECTED_TO_FAIL -Test case 44: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 2 0 EXPECTED_TO_FAIL -Test case 45: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 2 0 EXPECTED_TO_FAIL -Test case 46: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -424,7 +424,7 @@ table_dst count 12 0 OK -Test case 47: engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -435,13 +435,13 @@ table_dst count 32 0 OK -Test case 48: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 49: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 1 0 @@ -451,7 +451,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 50: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -462,7 +462,7 @@ table_dst count 12 0 OK -Test case 51: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 0 @@ -473,13 +473,13 @@ table_dst count 32 0 OK -Test case 52: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 53: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 1 0 @@ -489,7 +489,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 54: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -500,7 +500,7 @@ table_dst count 12 0 OK -Test case 55: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 @@ -511,13 +511,13 @@ table_dst count 32 0 OK -Test case 56: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 57: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 table_dst count 1 0 @@ -527,7 +527,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 58: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -538,7 +538,7 @@ table_dst count 12 0 OK -Test case 59: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_src count 1 table_dst count 16 0 @@ -549,13 +549,13 @@ table_dst count 32 0 OK -Test case 60: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 table_dst count 4 0 EXPECTED_TO_FAIL -Test case 61: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 table_dst count 1 0 @@ -565,7 +565,7 @@ table_dst count 1 0 EXPECTED_TO_FAIL -Test case 62: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 table_dst count 6 0 @@ -576,7 +576,663 @@ table_dst count 12 0 OK -Test case 63: engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED + +Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +FIXED + +Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +FIXED + +Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +FIXED + +Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +FIXED + +Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +FIXED + +Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED + +Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +FIXED + +Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +FIXED + +Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +FIXED + +Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +FIXED + +Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +FIXED + +Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 1 +0 +0 +table_src count 1 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 4 +0 +EXPECTED_TO_FAIL + +Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 1 +0 +0 +table_src count 16 +table_dst count 1 +0 +EXPECTED_TO_FAIL + +Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False table_src count 8 table_dst count 16 0 diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh index 487b3ac5f88..3d2814ed77d 100755 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh @@ -32,7 +32,7 @@ function is_known_error() { n=$1 for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ]; then + if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then return 0 fi done @@ -40,59 +40,64 @@ function is_known_error() } RUN_ONLY="" -#RUN_ONLY="Test case 0: engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True" +#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" i=0 -for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do +for insert_method in "InsertSelect" "InsertValues"; do + for engine in "MergeTree" "ReplicatedMergeTree"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do - THIS_RUN="Test case $i:" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$engine" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) + is_error=$(is_known_error "$i" && echo Y || echo N) + i=$((i+1)) - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi + if [ "$is_error" = Y ]; then + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL + else + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + fi + done done done done From ac27860b49c45cb0861d3989171be493140b0eb5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 20 Apr 2024 15:58:25 +0000 Subject: [PATCH 049/273] Automatic style fix --- src/Processors/Sinks/SinkToStorage.cpp | 3 +- src/Processors/Sinks/SinkToStorage.h | 21 +-- .../Transforms/NumberBlocksTransform.h | 9 +- src/Storages/MergeTree/MergeTreeSink.cpp | 9 +- src/Storages/MergeTree/MergeTreeSink.h | 1 - .../MergeTree/ReplicatedMergeTreeSink.cpp | 8 +- .../MergeTree/ReplicatedMergeTreeSink.h | 1 - .../0_stateless/03008_deduplication.python | 132 ++++++++++++++---- 8 files changed, 128 insertions(+), 56 deletions(-) diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 146bd4505a4..fff4a881e3d 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -15,9 +15,8 @@ void SinkToStorage::onConsume(Chunk chunk) */ Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); - setDeduplicationTokenForChildren(chunk); - fillDeduplicationTokenForChildren(chunk); consume(chunk); + fillDeduplicationTokenForChildren(chunk); if (!lastBlockIsDuplicate()) // TODO: remove that cur_chunk = std::move(chunk); } diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index 07a944b0943..21e003c4317 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -25,24 +25,12 @@ protected: virtual void consume(Chunk & chunk) = 0; virtual bool lastBlockIsDuplicate() const { return false; } - virtual std::shared_ptr setDeduplicationTokenForChildren(Chunk & chunk) const + void fillDeduplicationTokenForChildren(Chunk & chunk) const { auto token_info = chunk.getChunkInfos().get(); if (token_info) - return token_info; + return; - auto block_dedup_token_for_children = std::make_shared(""); - chunk.getChunkInfos().add(block_dedup_token_for_children); - return block_dedup_token_for_children; - } - - virtual std::shared_ptr getDeduplicationTokenForChildren(Chunk & chunk) const - { - return chunk.getChunkInfos().get(); - } - - virtual void fillDeduplicationTokenForChildren(Chunk & chunk) const - { SipHash hash; for (const auto & colunm: chunk.getColumns()) { @@ -50,8 +38,9 @@ protected: } const auto hash_value = hash.get128(); - chunk.getChunkInfos().get()->addTokenPart( - fmt::format(":hash-{}", toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]))); + chunk.getChunkInfos().add(std::make_shared( + fmt::format(":hash-{}", toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])) + )); } private: diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index ca990a925c1..9bc23a583d3 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -49,6 +49,7 @@ namespace DB class DedupTokenInfo : public ChunkInfoCloneable { public: + DedupTokenInfo() = default; DedupTokenInfo(const DedupTokenInfo & other) = default; explicit DedupTokenInfo(String first_part) { @@ -68,9 +69,15 @@ namespace DB return result; } + bool empty() const + { + return token_parts.empty(); + } + void addTokenPart(String part) { - token_parts.push_back(std::move(part)); + if (!part.empty()) + token_parts.push_back(std::move(part)); } private: diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 2e455cd2bd5..ce7833d25da 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -84,6 +85,7 @@ void MergeTreeSink::consume(Chunk & chunk) bool support_parallel_write = false; String block_dedup_token; + std::shared_ptr dedub_token_info_for_children = nullptr; if (storage.getDeduplicationLog()) { auto token_info = chunk.getChunkInfos().get(); @@ -102,6 +104,9 @@ void MergeTreeSink::consume(Chunk & chunk) } else { + dedub_token_info_for_children = std::make_shared(); + chunk.getChunkInfos().add(dedub_token_info_for_children); + LOG_DEBUG(storage.log, "dedup token from hash is caclulated"); } @@ -126,9 +131,9 @@ void MergeTreeSink::consume(Chunk & chunk) current_block.block.clear(); current_block.partition.clear(); - if (auto children_dedup_token = getDeduplicationTokenForChildren(chunk)) + if (dedub_token_info_for_children) { - children_dedup_token->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); } /// If optimize_on_insert setting is true, current_block could become empty after merge diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index 4e1ca5c1f60..8f065773d6a 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -41,7 +41,6 @@ private: struct DelayedChunk; std::unique_ptr delayed_chunk; - void fillDeduplicationTokenForChildren(Chunk &) const override { /* For MergeTree we get the tokens from part checksums */ } void finishDelayedChunk(); }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index ce140c93cbe..1712170dddd 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -293,6 +294,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } String block_dedup_token; + std::shared_ptr dedub_token_info_for_children = nullptr; if constexpr (!async_insert) { auto token_info = chunk.getChunkInfos().get(); @@ -314,6 +316,8 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } else { + dedub_token_info_for_children = std::make_shared(); + chunk.getChunkInfos().add(dedub_token_info_for_children); LOG_DEBUG(storage.log, "dedup token from hash is caclulated"); } @@ -382,9 +386,9 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - if (auto children_dedup_token = getDeduplicationTokenForChildren(chunk)) + if (dedub_token_info_for_children) { - children_dedup_token->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index b1eff67d845..e460804d7f1 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -139,7 +139,6 @@ private: /// We can delay processing for previous chunk and start writing a new one. std::unique_ptr delayed_chunk; - void fillDeduplicationTokenForChildren(Chunk &) const override { /* For MergeTree we get the tokens from part checksums */ } void finishDelayedChunk(const ZooKeeperWithFaultInjectionPtr & zookeeper); }; diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python index 87c48a73513..89dbea97667 100644 --- a/tests/queries/0_stateless/03008_deduplication.python +++ b/tests/queries/0_stateless/03008_deduplication.python @@ -22,7 +22,14 @@ def __format(template, **params): return template.format(**kv_args) -def instance_create_statement(table_name, table_columns, table_keys, table_engine, with_deduplication, no_merges=True): +def instance_create_statement( + table_name, + table_columns, + table_keys, + table_engine, + with_deduplication, + no_merges=True, +): template = """ CREATE TABLE {table_name} {table_columns} @@ -37,22 +44,36 @@ def instance_create_statement(table_name, table_columns, table_keys, table_engin params["table_columns"] = table_columns params["table_keys"] = table_keys params["table_no_merges"] = f"SYSTEM STOP MERGES {table_name};" if no_merges else "" - params["table_engine"] = "MergeTree()" if table_engine == "MergeTree" else f"ReplicatedMergeTree('/clickhouse/tables/{{database}}/{table_name}', '1')" + params["table_engine"] = ( + "MergeTree()" + if table_engine == "MergeTree" + else f"ReplicatedMergeTree('/clickhouse/tables/{{database}}/{table_name}', '1')" + ) - deduplication_window_setting_name = "non_replicated_deduplication_window" if table_engine == "MergeTree" else "replicated_deduplication_window" + deduplication_window_setting_name = ( + "non_replicated_deduplication_window" + if table_engine == "MergeTree" + else "replicated_deduplication_window" + ) deduplication_window_setting_value = 1000 if with_deduplication else 0 settings = list() - settings += [f"{deduplication_window_setting_name}={deduplication_window_setting_value}"] + settings += [ + f"{deduplication_window_setting_name}={deduplication_window_setting_value}" + ] params["table_settings"] = "SETTINGS " + ",".join(settings) return __format(template, **params) -def instance_insert_statement(table_name, count, insert_method, insert_unique_blocks, use_insert_token): - insert_settings = "" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'" +def instance_insert_statement( + table_name, count, insert_method, insert_unique_blocks, use_insert_token +): + insert_settings = ( + "" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'" + ) - if insert_method == 'InsertSelect': + if insert_method == "InsertSelect": template = """ INSERT INTO {table_name} SELECT {insert_columns} @@ -62,7 +83,9 @@ def instance_insert_statement(table_name, count, insert_method, insert_unique_bl template, table_name=table_name, count=count, - insert_columns="'src_4', 4" if not insert_unique_blocks else "'src_' || toString(number), number", + insert_columns="'src_4', 4" + if not insert_unique_blocks + else "'src_' || toString(number), number", insert_settings=insert_settings, ) @@ -74,7 +97,9 @@ def instance_insert_statement(table_name, count, insert_method, insert_unique_bl values = [] for i in range(count): - values += [f"('src_{i}', {i})"] if insert_unique_blocks else ["('src_4', 4)"] + values += ( + [f"('src_{i}', {i})"] if insert_unique_blocks else ["('src_4', 4)"] + ) insert_values = ", ".join(values) return __format( @@ -86,7 +111,9 @@ def instance_insert_statement(table_name, count, insert_method, insert_unique_bl def get_drop_tables_statements(tables): - return "".join([f"DROP TABLE IF EXISTS {table_name};\n" for table_name in tables[::-1]]) + return "".join( + [f"DROP TABLE IF EXISTS {table_name};\n" for table_name in tables[::-1]] + ) def get_logs_statement(args): @@ -94,15 +121,17 @@ def get_logs_statement(args): return "SET send_logs_level='test';" return "" + def str2bool(v): if isinstance(v, bool): return v - if v.lower() in ('yes', 'true', 't', 'y', '1'): + if v.lower() in ("yes", "true", "t", "y", "1"): return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): + elif v.lower() in ("no", "false", "f", "n", "0"): return False else: - raise argparse.ArgumentTypeError('Boolean value expected.') + raise argparse.ArgumentTypeError("Boolean value expected.") + class ArgsFactory: def __init__(self, parser): @@ -110,29 +139,55 @@ class ArgsFactory: def add_opt_engine(self): self.__parser.add_argument( - "--table-engine", choices=["ReplicatedMergeTree", "MergeTree"], default="MergeTree") + "--table-engine", + choices=["ReplicatedMergeTree", "MergeTree"], + default="MergeTree", + ) def add_opt_user_token(self): - self.__parser.add_argument("--use-insert-token", type=str2bool, nargs='?', const=True, default=False) + self.__parser.add_argument( + "--use-insert-token", type=str2bool, nargs="?", const=True, default=False + ) def add_opt_single_thread(self): - self.__parser.add_argument("--single-thread", type=str2bool, nargs='?', const=True, default=True) + self.__parser.add_argument( + "--single-thread", type=str2bool, nargs="?", const=True, default=True + ) def add_opt_dedup_src(self): - self.__parser.add_argument("--deduplicate-src-table", type=str2bool, nargs='?', const=True, default=True) + self.__parser.add_argument( + "--deduplicate-src-table", + type=str2bool, + nargs="?", + const=True, + default=True, + ) def add_opt_dedup_dst(self): - self.__parser.add_argument("--deduplicate-dst-table", type=str2bool, nargs='?', const=True, default=True) + self.__parser.add_argument( + "--deduplicate-dst-table", + type=str2bool, + nargs="?", + const=True, + default=True, + ) def add_opt_get_logs(self): - self.__parser.add_argument("--get-logs", type=str2bool, nargs='?', const=True, default=False) + self.__parser.add_argument( + "--get-logs", type=str2bool, nargs="?", const=True, default=False + ) def add_opt_uniq_blocks(self): - self.__parser.add_argument("--insert-unique-blocks", type=str2bool, nargs='?', const=True, default=True) + self.__parser.add_argument( + "--insert-unique-blocks", type=str2bool, nargs="?", const=True, default=True + ) def add_opt_insert_method(self): self.__parser.add_argument( - "--insert-method", choices=["InsertSelect", "InsertValues"], default="InsertSelect") + "--insert-method", + choices=["InsertSelect", "InsertValues"], + default="InsertSelect", + ) def add_all(self): self.add_opt_engine() @@ -174,10 +229,16 @@ def test_insert_several_blocks(parser): WHERE b % 2 = 0; """ - drop_tables_statements = get_drop_tables_statements( ["table_a_b", "table_when_b_even", "mv_b_even"] ) + drop_tables_statements = get_drop_tables_statements( + ["table_a_b", "table_when_b_even", "mv_b_even"] + ) insert_statement = instance_insert_statement( - "table_a_b", 10, args.insert_method, args.insert_unique_blocks, args.use_insert_token + "table_a_b", + 10, + args.insert_method, + args.insert_unique_blocks, + args.use_insert_token, ) print_details_statements = f""" @@ -190,8 +251,6 @@ def test_insert_several_blocks(parser): {"" if not args.get_logs else "SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part;"} """ - - if args.insert_unique_blocks: assert_first_insert_statements = f""" SELECT throwIf( count() != 10 ) @@ -278,7 +337,12 @@ def test_mv_generates_several_blocks(parser): ArgsFactory(parser).add_all() def calle(args): - tables = ["table_for_join_with", "table_a_b", "table_when_b_even_and_joined", "mv_b_even"] + tables = [ + "table_for_join_with", + "table_a_b", + "table_when_b_even_and_joined", + "mv_b_even", + ] drop_tables_statements = get_drop_tables_statements(tables) details_print_for_table_for_join_with = "" @@ -305,7 +369,11 @@ def test_mv_generates_several_blocks(parser): ) insert_statement = instance_insert_statement( - "table_a_b", 5, args.insert_method, args.insert_unique_blocks, args.use_insert_token + "table_a_b", + 5, + args.insert_method, + args.insert_unique_blocks, + args.use_insert_token, ) details_print_statements = f""" @@ -449,7 +517,11 @@ def test_several_mv_into_one_table(parser): ) insert_statement = instance_insert_statement( - "table_src", 8, args.insert_method, args.insert_unique_blocks, args.use_insert_token + "table_src", + 8, + args.insert_method, + args.insert_unique_blocks, + args.use_insert_token, ) details_print_statements = f""" @@ -568,9 +640,7 @@ def parse_args(): test_mv_generates_several_blocks( subparsers.add_parser("mv_generates_several_blocks") ) - test_several_mv_into_one_table( - subparsers.add_parser("several_mv_into_one_table") - ) + test_several_mv_into_one_table(subparsers.add_parser("several_mv_into_one_table")) args = parser.parse_args() if args.test is None: parser.print_help() From c7908f62d056c5a96f0bea743de48e48399c0f91 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 1 May 2024 14:06:04 +0200 Subject: [PATCH 050/273] fix sigfault --- src/Storages/MergeTree/MergeTreeSink.cpp | 11 ++++++----- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index ce7833d25da..7ca6ed10a76 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -131,16 +131,17 @@ void MergeTreeSink::consume(Chunk & chunk) current_block.block.clear(); current_block.partition.clear(); - if (dedub_token_info_for_children) - { - dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); - } - /// If optimize_on_insert setting is true, current_block could become empty after merge /// and we didn't create part. if (!temp_part.part) continue; + if (dedub_token_info_for_children) + { + chassert(temp_part.part); + dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + } + if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) support_parallel_write = true; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 1712170dddd..3c1e2bc9219 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -388,6 +388,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if (dedub_token_info_for_children) { + chassert(temp_part.part); dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); } } From 687b5940fa37563f2121ab2f915edd513a2c8b6e Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 1 May 2024 14:45:48 +0200 Subject: [PATCH 051/273] fix style --- src/Interpreters/InterpreterCreateQuery.cpp | 6 ++---- src/Interpreters/InterpreterInsertQuery.cpp | 6 ++++-- src/QueryPipeline/QueryPlanResourceHolder.h | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 7 ++++++- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 2 +- 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index a143ca867e1..35ed6c9ab69 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1691,14 +1691,12 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) insert->select = create.select->clone(); return InterpreterInsertQuery( - insert, + insert, getContext(), getContext()->getSettingsRef().insert_allow_materialized_columns, false, false, - false - ) - .execute(); + false).execute(); } return {}; diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 40d5a84031d..62bffcfe6a1 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -592,10 +592,12 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline() { pipeline.resize(1); - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr { + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { return std::make_shared(settings.insert_deduplication_token.value, in_header); }); - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr { + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { return std::make_shared(in_header); }); } diff --git a/src/QueryPipeline/QueryPlanResourceHolder.h b/src/QueryPipeline/QueryPlanResourceHolder.h index e40fa04f72c..10f7f39ab09 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.h +++ b/src/QueryPipeline/QueryPlanResourceHolder.h @@ -19,7 +19,7 @@ struct QueryPlanResourceHolder QueryPlanResourceHolder(); QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept; ~QueryPlanResourceHolder(); - + QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &) = delete; /// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs. diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 7ca6ed10a76..2d29f87c556 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -13,6 +13,11 @@ namespace ProfileEvents extern const Event DuplicatedInsertedBlocks; } +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace DB { @@ -108,7 +113,7 @@ void MergeTreeSink::consume(Chunk & chunk) chunk.getChunkInfos().add(dedub_token_info_for_children); LOG_DEBUG(storage.log, - "dedup token from hash is caclulated"); + "dedup token from hash is calculated"); } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 3c1e2bc9219..e855bb7d969 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -319,7 +319,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) dedub_token_info_for_children = std::make_shared(); chunk.getChunkInfos().add(dedub_token_info_for_children); LOG_DEBUG(storage.log, - "dedup token from hash is caclulated"); + "dedup token from hash is calculated"); } } From f1493a40a4b70251b12bc06bdd43bbd9eeadfe8b Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 1 May 2024 15:44:30 +0200 Subject: [PATCH 052/273] fix tests --- src/Interpreters/AsynchronousInsertQueue.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 65035790729..e1595243ae3 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -780,7 +780,12 @@ try try { interpreter = std::make_unique( - key.query, insert_context, key.settings.insert_allow_materialized_columns, true, false, false); + key.query, + insert_context, + key.settings.insert_allow_materialized_columns, + false, + false, + true); pipeline = interpreter->execute().pipeline; chassert(pipeline.pushing()); From fa667b454366c724dcfa59872de4d89c987fa5bd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 1 May 2024 18:21:32 +0200 Subject: [PATCH 053/273] fix tests --- src/Interpreters/InterpreterCheckQuery.cpp | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index e070d8694a7..81bb6290acb 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -12,7 +12,6 @@ #include #include #include -#include "Processors/Chunk.h" #include #include @@ -24,6 +23,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 62bffcfe6a1..bbff38a06bf 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,6 @@ #include #include #include -#include "Interpreters/Context_fwd.h" namespace ProfileEvents @@ -682,6 +682,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline() chain.addSource(std::move(counting)); QueryPipeline pipeline = QueryPipeline(std::move(chain)); + pipeline.setNumThreads(std::min(pipeline.getNumThreads(), settings.max_threads)); pipeline.setConcurrencyControl(settings.use_concurrency_control); @@ -735,7 +736,11 @@ BlockIO InterpreterInsertQuery::execute() { if (settings.parallel_distributed_insert_select) { - res.pipeline = *table->distributedWrite(query, getContext()); + auto distributed = table->distributedWrite(query, getContext()); + if (distributed) + res.pipeline = std::move(*distributed); + else + res.pipeline = buildInsertSelectPipeline(); } else { From 2d4216ecada4593ba43af441c6fd44f694e1867d Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 2 May 2024 13:00:57 +0200 Subject: [PATCH 054/273] debugging --- src/Interpreters/InterpreterInsertQuery.cpp | 10 ++++++++++ src/Storages/StorageDistributed.cpp | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index bbff38a06bf..435c616f27c 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -499,6 +499,10 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline() InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options); pipeline = interpreter_select.buildQueryPipeline(); } + + // auto resources = QueryPlanResourceHolder(); + // resources.interpreter_context.push_back(select_context); + // pipeline.addResources(std::move(resources)); } pipeline.dropTotalsAndExtremes(); @@ -710,6 +714,12 @@ BlockIO InterpreterInsertQuery::execute() StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); + if (auto * dist_storage = dynamic_cast(table.get())) + { + LOG_DEBUG(getLogger("InsertQuery"), + "dist_storage engine {} table name {}.{}", dist_storage->getName(), dist_storage->getStorageID().database_name, dist_storage->getStorageID().table_name); + } + if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 0478936fdfc..1ecb83aa120 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -23,6 +23,7 @@ #include +#include "Common/logger_useful.h" #include #include #include @@ -106,6 +107,7 @@ #include #include +#include #include #include #include @@ -1822,10 +1824,14 @@ void StorageDistributed::renameOnDisk(const String & new_path_to_table_data) void StorageDistributed::delayInsertOrThrowIfNeeded() const { + LOG_WARNING(log, "delayInsertOrThrowIfNeeded"); + if (!distributed_settings.bytes_to_throw_insert && !distributed_settings.bytes_to_delay_insert) return; + LOG_WARNING(log, "delayInsertOrThrowIfNeeded getContext() is null: {}", getContext() == nullptr); + UInt64 total_bytes = *totalBytes(getContext()->getSettingsRef()); if (distributed_settings.bytes_to_throw_insert && total_bytes > distributed_settings.bytes_to_throw_insert) From db6951e48897d297f443dfb85f8da76add6e4f0d Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 2 May 2024 15:43:49 +0200 Subject: [PATCH 055/273] fix distributed inserts --- src/Common/CollectionOfDerived.h | 14 ++++++++++ src/Interpreters/InterpreterInsertQuery.cpp | 26 ++++++++++++------- src/Interpreters/InterpreterInsertQuery.h | 4 +-- .../FinishAggregatingInOrderAlgorithm.cpp | 11 +++++--- src/Storages/StorageDistributed.cpp | 4 --- 5 files changed, 39 insertions(+), 20 deletions(-) diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h index 8579c4dd50c..c98e375b4b1 100644 --- a/src/Common/CollectionOfDerived.h +++ b/src/Common/CollectionOfDerived.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -109,6 +110,19 @@ public: return cast; } + std::string debug() const + { + std::string result; + + for (auto & rec : records) + { + result.append(rec.type_idx.name()); + result.append(" "); + } + + return result; + } + private: bool isUniqTypes() const { diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 435c616f27c..fdf77486c85 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -429,12 +429,10 @@ std::pair, std::vector> InterpreterInsertQuery::buildP } -QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline() +QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table) { const Settings & settings = getContext()->getSettingsRef(); - auto & query = query_ptr->as(); - StoragePtr table = getTable(query); auto metadata_snapshot = table->getInMemoryMetadataPtr(); auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); @@ -641,12 +639,10 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline() } -QueryPipeline InterpreterInsertQuery::buildInsertPipeline() +QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query, StoragePtr table) { const Settings & settings = getContext()->getSettingsRef(); - auto & query = query_ptr->as(); - StoragePtr table = getTable(query); auto metadata_snapshot = table->getInMemoryMetadataPtr(); auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); @@ -714,9 +710,11 @@ BlockIO InterpreterInsertQuery::execute() StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); + bool is_table_dist = false; if (auto * dist_storage = dynamic_cast(table.get())) { - LOG_DEBUG(getLogger("InsertQuery"), + is_table_dist = true; + LOG_DEBUG(getLogger("InsertQuery"), "dist_storage engine {} table name {}.{}", dist_storage->getName(), dist_storage->getStorageID().database_name, dist_storage->getStorageID().table_name); } @@ -748,18 +746,26 @@ BlockIO InterpreterInsertQuery::execute() { auto distributed = table->distributedWrite(query, getContext()); if (distributed) + { + LOG_DEBUG(getLogger("InsertQuery"),"as dist pipeline, is_table_dist {}", is_table_dist); res.pipeline = std::move(*distributed); + } else - res.pipeline = buildInsertSelectPipeline(); + { + LOG_DEBUG(getLogger("InsertQuery"),"as insert select after dist, is_table_dist {}", is_table_dist); + res.pipeline = buildInsertSelectPipeline(query, table); + } } else { - res.pipeline = buildInsertSelectPipeline(); + LOG_DEBUG(getLogger("InsertQuery"),"as insert select, is_table_dist {}", is_table_dist); + res.pipeline = buildInsertSelectPipeline(query, table); } } else { - res.pipeline = buildInsertPipeline(); + LOG_DEBUG(getLogger("InsertQuery"),"as just insert, is_table_dist {}", is_table_dist); + res.pipeline = buildInsertPipeline(query, table); } res.pipeline.addStorageHolder(table); diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 3f3b7a6f106..b06bb9a3db2 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -81,8 +81,8 @@ private: std::pair, std::vector> buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); - QueryPipeline buildInsertSelectPipeline(); - QueryPipeline buildInsertPipeline(); + QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table); + QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table); Chain buildSink( const StoragePtr & table, diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp index f33cc267c44..ae47de4a81e 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp @@ -51,11 +51,14 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num if (!input.chunk.hasRows()) return; - const auto & arenas_info = input.chunk.getChunkInfos().get(); - if (!arenas_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "ChunkInfoWithAllocatedBytes was not set for chunk in FinishAggregatingInOrderAlgorithm"); + if (input.chunk.getChunkInfos().empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm"); - states[source_num] = State{input.chunk, description, arenas_info->allocated_bytes}; + Int64 allocated_bytes = 0; + if (auto arenas_info = input.chunk.getChunkInfos().get()) + allocated_bytes = arenas_info->allocated_bytes; + + states[source_num] = State{input.chunk, description, allocated_bytes}; } IMergingAlgorithm::Status FinishAggregatingInOrderAlgorithm::merge() diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 1ecb83aa120..747bb2c2080 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1824,14 +1824,10 @@ void StorageDistributed::renameOnDisk(const String & new_path_to_table_data) void StorageDistributed::delayInsertOrThrowIfNeeded() const { - LOG_WARNING(log, "delayInsertOrThrowIfNeeded"); - if (!distributed_settings.bytes_to_throw_insert && !distributed_settings.bytes_to_delay_insert) return; - LOG_WARNING(log, "delayInsertOrThrowIfNeeded getContext() is null: {}", getContext() == nullptr); - UInt64 total_bytes = *totalBytes(getContext()->getSettingsRef()); if (distributed_settings.bytes_to_throw_insert && total_bytes > distributed_settings.bytes_to_throw_insert) From 7fe1fe11b9c59eda01cb20f506f3c3e95398668d Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 3 May 2024 22:43:47 +0200 Subject: [PATCH 056/273] fixing tests --- src/Columns/ColumnObject.cpp | 6 +++ src/Columns/ColumnObject.h | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 10 ++--- src/Interpreters/SquashingTransform.cpp | 20 ++++----- src/Interpreters/SquashingTransform.h | 11 +++-- src/Processors/ISimpleTransform.h | 4 ++ .../Transforms/CountingTransform.cpp | 3 ++ src/Processors/Transforms/CountingTransform.h | 4 ++ .../Transforms/ExpressionTransform.cpp | 7 ++++ .../Transforms/MaterializingTransform.cpp | 6 +++ .../Transforms/NumberBlocksTransform.h | 20 +++++++++ .../Transforms/SquashingChunksTransform.cpp | 42 +++++++++++++++---- .../Transforms/SquashingChunksTransform.h | 1 + .../Transforms/buildPushingToViewsChain.cpp | 25 ++++++----- src/Server/TCPHandler.cpp | 6 +-- src/Storages/MergeTree/MutateTask.cpp | 9 ++-- ...view_and_deduplication_zookeeper.reference | 2 +- ...lized_view_and_deduplication_zookeeper.sql | 2 +- 18 files changed, 129 insertions(+), 51 deletions(-) diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 90ef974010c..ded56b60e64 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -1093,4 +1093,10 @@ void ColumnObject::finalize() checkObjectHasNoAmbiguosPaths(getKeys()); } +void ColumnObject::updateHashFast(SipHash & hash) const +{ + for (const auto & entry : subcolumns) + for (auto & part : entry->data.data) + part->updateHashFast(hash); +} } diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index e2936b27994..b1b8827622f 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -242,7 +242,7 @@ public: const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); } - void updateHashFast(SipHash &) const override { throwMustBeConcrete(); } + void updateHashFast(SipHash & hash) const override; void expand(const Filter &, bool) override { throwMustBeConcrete(); } bool hasEqualValues() const override { throwMustBeConcrete(); } size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index fdf77486c85..2961d643869 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -471,8 +471,10 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & if (table->prefersLargeBlocks()) { - new_settings.max_block_size = std::max(settings.min_insert_block_size_rows, settings.max_block_size); - new_settings.preferred_block_size_bytes = std::max(settings.min_insert_block_size_bytes, settings.preferred_block_size_bytes); + if (settings.min_insert_block_size_rows) + new_settings.max_block_size = settings.min_insert_block_size_rows; + if (settings.min_insert_block_size_bytes) + new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; } auto context_for_trivial_select = Context::createCopy(context); @@ -497,10 +499,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options); pipeline = interpreter_select.buildQueryPipeline(); } - - // auto resources = QueryPlanResourceHolder(); - // resources.interpreter_context.push_back(select_context); - // pipeline.addResources(std::move(resources)); } pipeline.dropTotalsAndExtremes(); diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index 41f024df7a7..cf4f2060414 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -15,12 +15,12 @@ SquashingTransform::SquashingTransform(size_t min_block_size_rows_, size_t min_b { } -Block SquashingTransform::add(Block && input_block) +SquashingTransform::SquashResult SquashingTransform::add(Block && input_block) { return addImpl(std::move(input_block)); } -Block SquashingTransform::add(const Block & input_block) +SquashingTransform::SquashResult SquashingTransform::add(const Block & input_block) { return addImpl(input_block); } @@ -32,14 +32,14 @@ Block SquashingTransform::add(const Block & input_block) * have to. */ template -Block SquashingTransform::addImpl(ReferenceType input_block) +SquashingTransform::SquashResult SquashingTransform::addImpl(ReferenceType input_block) { /// End of input stream. if (!input_block) { Block to_return; std::swap(to_return, accumulated_block); - return to_return; + return SquashResult{std::move(to_return), false}; } /// Just read block is already enough. @@ -48,13 +48,13 @@ Block SquashingTransform::addImpl(ReferenceType input_block) /// If no accumulated data, return just read block. if (!accumulated_block) { - return std::move(input_block); + return SquashResult{std::move(input_block), false}; } /// Return accumulated data (maybe it has small size) and place new block to accumulated data. Block to_return = std::move(input_block); std::swap(to_return, accumulated_block); - return to_return; + return SquashResult{std::move(to_return), true}; } /// Accumulated block is already enough. @@ -63,7 +63,7 @@ Block SquashingTransform::addImpl(ReferenceType input_block) /// Return accumulated data and place new block to accumulated data. Block to_return = std::move(input_block); std::swap(to_return, accumulated_block); - return to_return; + return SquashResult{std::move(to_return), true}; } append(std::move(input_block)); @@ -71,11 +71,11 @@ Block SquashingTransform::addImpl(ReferenceType input_block) { Block to_return; std::swap(to_return, accumulated_block); - return to_return; + return SquashResult{std::move(to_return), false}; } - /// Squashed block is not ready. - return {}; + /// Squashed block is not ready, input block consumed + return SquashResult{{}, true}; } diff --git a/src/Interpreters/SquashingTransform.h b/src/Interpreters/SquashingTransform.h index b04d012bcd1..f1eba537338 100644 --- a/src/Interpreters/SquashingTransform.h +++ b/src/Interpreters/SquashingTransform.h @@ -25,11 +25,16 @@ public: /// Conditions on rows and bytes are OR-ed. If one of them is zero, then corresponding condition is ignored. SquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_); + struct SquashResult + { + Block block; + bool input_block_delayed = false; + }; /** Add next block and possibly returns squashed block. * At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true. */ - Block add(Block && block); - Block add(const Block & block); + SquashResult add(Block && block); + SquashResult add(const Block & block); private: size_t min_block_size_rows; @@ -38,7 +43,7 @@ private: Block accumulated_block; template - Block addImpl(ReferenceType block); + SquashResult addImpl(ReferenceType block); template void append(ReferenceType block); diff --git a/src/Processors/ISimpleTransform.h b/src/Processors/ISimpleTransform.h index 629529cdffa..3862ea76dbb 100644 --- a/src/Processors/ISimpleTransform.h +++ b/src/Processors/ISimpleTransform.h @@ -2,6 +2,8 @@ #include +#include + namespace DB { @@ -29,6 +31,8 @@ protected: virtual void transform(Chunk & input_chunk, Chunk & output_chunk) { + LOG_DEBUG(getLogger("ISimpleTransform"), + "transform {}", input_chunk.getNumRows()); transform(input_chunk); output_chunk.swap(input_chunk); } diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 3dfb9fe178f..7329a196f8a 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -17,6 +17,9 @@ namespace DB void CountingTransform::onConsume(Chunk chunk) { + LOG_DEBUG(getLogger("CountingTransform"), + "onConsume {}", chunk.getNumRows()); + if (quota) quota->used(QuotaType::WRITTEN_BYTES, chunk.bytes()); diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h index 05d8e2aeac8..ab8d083fd05 100644 --- a/src/Processors/Transforms/CountingTransform.h +++ b/src/Processors/Transforms/CountingTransform.h @@ -4,6 +4,8 @@ #include #include +#include + namespace DB { @@ -43,6 +45,8 @@ public: void onConsume(Chunk chunk) override; GenerateResult onGenerate() override { + LOG_DEBUG(getLogger("CountingTransform"), + "onGenerate {}", cur_chunk.getNumRows()); GenerateResult res; res.chunk = std::move(cur_chunk); return res; diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 2fbd2c21b8d..db5d2b0c49c 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -1,5 +1,9 @@ #include #include + +#include + + namespace DB { @@ -17,6 +21,9 @@ ExpressionTransform::ExpressionTransform(const Block & header_, ExpressionAction void ExpressionTransform::transform(Chunk & chunk) { + LOG_DEBUG(getLogger("ExpressionTransform"), + "transform {}", chunk.getNumRows()); + size_t num_rows = chunk.getNumRows(); auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index 1eaa5458d37..8366472f876 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -1,6 +1,9 @@ #include #include +#include + + namespace DB { @@ -9,6 +12,9 @@ MaterializingTransform::MaterializingTransform(const Block & header) void MaterializingTransform::transform(Chunk & chunk) { + LOG_DEBUG(getLogger("MaterializingTransform"), + "transform {}", chunk.getNumRows()); + auto num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index 9bc23a583d3..6586f015d3e 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -228,4 +228,24 @@ namespace DB String token_part; }; + class RestoreChunkInfosTransform : public ISimpleTransform + { + public: + RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , chunk_infos(chunk_infos_) + { + } + + String getName() const override { return "RestoreChunkInfosTransform"; } + + void transform(Chunk & chunk) override + { + chunk.getChunkInfos().append(chunk_infos.clone()); + } + + private: + Chunk::ChunkInfoCollection chunk_infos; + }; + } diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 4d693e5e809..7464cb79ba6 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -17,15 +17,24 @@ SquashingChunksTransform::SquashingChunksTransform( void SquashingChunksTransform::onConsume(Chunk chunk) { + LOG_DEBUG(getLogger("SquashingChunksTransform"), + "onConsume {}", chunk.getNumRows()); + if (cur_chunkinfos.empty()) cur_chunkinfos = chunk.getChunkInfos(); - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) + auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + if (result.block) { - cur_chunk.setColumns(block.getColumns(), block.rows()); + cur_chunk.setColumns(result.block.getColumns(), result.block.rows()); cur_chunk.setChunkInfos(std::move(cur_chunkinfos)); cur_chunkinfos = {}; } + + if (cur_chunkinfos.empty() && result.input_block_delayed) + { + cur_chunkinfos = chunk.getChunkInfos(); + } } SquashingChunksTransform::GenerateResult SquashingChunksTransform::onGenerate() @@ -38,8 +47,8 @@ SquashingChunksTransform::GenerateResult SquashingChunksTransform::onGenerate() void SquashingChunksTransform::onFinish() { - auto block = squashing.add({}); - finish_chunk.setColumns(block.getColumns(), block.rows()); + auto result = squashing.add({}); + finish_chunk.setColumns(result.block.getColumns(), result.block.rows()); finish_chunk.setChunkInfos(std::move(cur_chunkinfos)); cur_chunkinfos = {}; } @@ -69,12 +78,25 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( void SimpleSquashingChunksTransform::transform(Chunk & chunk) { + LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), + "transform {}", chunk.getNumRows()); + if (!finished) { - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) + if (cur_chunkinfos.empty()) + cur_chunkinfos = chunk.getChunkInfos(); + + auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + if (result.block) { - chunk.setColumns(block.getColumns(), block.rows()); - chunk.setChunkInfos(chunk.getChunkInfos()); + chunk.setColumns(result.block.getColumns(), result.block.rows()); + chunk.setChunkInfos(std::move(cur_chunkinfos)); + cur_chunkinfos = {}; + } + + if (cur_chunkinfos.empty() && result.input_block_delayed) + { + cur_chunkinfos = chunk.getChunkInfos(); } } else @@ -82,8 +104,10 @@ void SimpleSquashingChunksTransform::transform(Chunk & chunk) if (chunk.hasRows()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - auto block = squashing.add({}); - chunk.setColumns(block.getColumns(), block.rows()); + auto result = squashing.add({}); + chunk.setColumns(result.block.getColumns(), result.block.rows()); + chunk.setChunkInfos(std::move(cur_chunkinfos)); + cur_chunkinfos = {}; } } diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index 6de96d4100d..116b9e47460 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -45,6 +45,7 @@ protected: private: SquashingTransform squashing; + Chunk::ChunkInfoCollection cur_chunkinfos; /// When consumption is finished we need to release the final chunk regardless of its size. bool finished = false; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 056f8d07627..ccecfcf3333 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -124,7 +124,6 @@ private: { QueryPipeline pipeline; PullingPipelineExecutor executor; - Chunk::ChunkInfoCollection chunk_infos; explicit State(QueryPipeline pipeline_) : pipeline(std::move(pipeline_)) @@ -397,10 +396,13 @@ std::optional generateViewChain( { out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); - if (!disable_deduplication_for_children) - { - out.addSource(std::make_shared(out.getInputHeader())); - } + // if (!disable_deduplication_for_children) + // { + // // out.addSource(std::make_shared(out.getInputHeader())); + // // out.addSource(std::make_shared(out.getInputHeader())); + + // out.addSource(std::make_shared(out.getInputHeader())); + // } auto executing_inner_query = std::make_shared( storage_header, views_data->views.back(), views_data); @@ -576,7 +578,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection chunk_infos) { const auto & context = views_data.context; @@ -623,8 +625,9 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), std::make_shared(std::move(converting)))); - pipeline.addTransform(std::make_shared(pipeline.getHeader())); - //pipeline.addTransform(std::make_shared(pipeline.getHeader())); + //pipeline.addTransform(std::make_shared(pipeline.getHeader())); + pipeline.addTransform(std::make_shared(std::move(chunk_infos), pipeline.getHeader())); + pipeline.addTransform(std::make_shared(pipeline.getHeader())); return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } @@ -727,8 +730,7 @@ ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); - state.emplace(process(block, view, *views_data)); - state->chunk_infos = chunk.getChunkInfos(); + state.emplace(process(block, view, *views_data, chunk.getChunkInfos())); } @@ -746,9 +748,6 @@ ExecutingInnerQueryFromViewTransform::GenerateResult ExecutingInnerQueryFromView break; } - // here are we copy chunk_infos to the all chunks generated from the one consumed chunk - res.chunk.getChunkInfos().append(state->chunk_infos.clone()); - if (res.is_done) state.reset(); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 3db935729b4..c21d230cba7 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -890,18 +890,18 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro while (readDataNext()) { auto result = squashing.add(std::move(state.block_for_insert)); - if (result) + if (result.block) { return PushResult { .status = PushResult::TOO_MUCH_DATA, - .insert_block = std::move(result), + .insert_block = std::move(result.block), }; } } auto result = squashing.add({}); - return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); + return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result.block), query_context); } void TCPHandler::processInsertQuery() diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 5934756fb95..ab316947ff8 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1298,7 +1298,8 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() Block projection_block; { ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); - projection_block = projection_squashes[i].add(projection.calculate(cur_block, ctx->context)); + auto result = projection_squashes[i].add(projection.calculate(cur_block, ctx->context)); + projection_block = std::move(result.block); } if (projection_block) @@ -1323,11 +1324,11 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash = projection_squashes[i]; - auto projection_block = projection_squash.add({}); - if (projection_block) + auto squash_result = projection_squash.add({}); + if (squash_result.block) { auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); + *ctx->data, ctx->log, std::move(squash_result.block), projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); temp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(temp_part.part)); diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference index adf6abb7298..741591b0dd4 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference @@ -2,7 +2,7 @@ 3 2 -3 +2 1 1 diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql index d3c4da86b41..0a41581025a 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql @@ -29,7 +29,7 @@ INSERT INTO without_deduplication VALUES (43); SELECT count() FROM with_deduplication; SELECT count() FROM without_deduplication; --- Implicit insert isn't deduplicated +-- Implicit insert is deduplicated even for MV without_deduplication_mv SELECT ''; SELECT countMerge(cnt) FROM with_deduplication_mv; SELECT countMerge(cnt) FROM without_deduplication_mv; From 9242c78fee09962abc5a28f368887e531187b5dd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 6 May 2024 21:53:22 +0200 Subject: [PATCH 057/273] work with test --- src/Interpreters/InterpreterInsertQuery.cpp | 131 +++--- src/Processors/Sinks/SinkToStorage.cpp | 4 +- src/Processors/Sinks/SinkToStorage.h | 20 - .../Transforms/NumberBlocksTransform.cpp | 156 ++++++ .../Transforms/NumberBlocksTransform.h | 391 +++++++-------- .../Transforms/SquashingChunksTransform.cpp | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 61 +-- src/Storages/FileLog/StorageFileLog.cpp | 8 +- src/Storages/MergeTree/MergeTreeSink.cpp | 21 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 28 +- .../MergeTree/ReplicatedMergeTreeSink.h | 11 - ...02912_ingestion_mv_deduplication.reference | 3 +- .../02912_ingestion_mv_deduplication.sql | 1 + ...uplication_insert_several_blocks.reference | 148 ++++-- ...tion_mv_generates_several_blocks.reference | 324 ++++++++++--- ...cation_several_mv_into_one_table.reference | 444 ++++++++++++------ 16 files changed, 1162 insertions(+), 591 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2961d643869..0f3df3752cb 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -39,6 +39,7 @@ #include #include #include +#include "base/defines.h" namespace ProfileEvents @@ -398,6 +399,9 @@ Chain InterpreterInsertQuery::buildPreSinkChain( std::pair, std::vector> InterpreterInsertQuery::buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) { + chassert(presink_streams > 0); + chassert(sink_streams > 0); + ThreadGroupPtr running_group; if (current_thread) running_group = current_thread->getThreadGroup(); @@ -410,8 +414,8 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < sink_streams; ++i) { LOG_DEBUG(getLogger("InsertQuery"), - "call buildSink table name {}.{}, stream {}/{}", - table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); + "call buildSink sink_streams table name {}.{}, stream {}/{}", + table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams); auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, running_group, /* elapsed_counter_ms= */ nullptr); @@ -421,6 +425,10 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < presink_streams; ++i) { + LOG_DEBUG(getLogger("InsertQuery"), + "call buildSink presink_streams table name {}.{}, stream {}/{}", + table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); + auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); presink_chains.emplace_back(std::move(out)); } @@ -454,6 +462,9 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & ContextPtr select_context = getContext(); + LOG_DEBUG(getLogger("InsertQuery"), + "execute() is_trivial_insert_select {} prefersLargeBlocks={} max_insert_threads {}", is_trivial_insert_select, table->prefersLargeBlocks(), settings.max_insert_threads); + if (is_trivial_insert_select) { /** When doing trivial INSERT INTO ... SELECT ... FROM table, @@ -462,9 +473,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & * to avoid unnecessary squashing. */ - LOG_DEBUG(getLogger("InsertQuery"), - "execute() is_trivial_insert_select=true prefersLargeBlocks={}", table->prefersLargeBlocks()); - Settings new_settings = select_context->getSettings(); new_settings.max_threads = std::max(1, settings.max_insert_threads); @@ -503,6 +511,11 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & pipeline.dropTotalsAndExtremes(); + LOG_DEBUG(getLogger("InsertQuery"), + "adding transforms, pipline size {}, threads {}, max_insert_threads {}", + pipeline.getNumStreams(), pipeline.getNumThreads(), settings.max_insert_threads); + + /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. if (getContext()->getSettingsRef().insert_null_as_default) { @@ -532,6 +545,56 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & } } + pipeline.resize(1); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + }); + } + + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + + if (!settings.insert_deduplication_token.value.empty()) + { + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { + return std::make_shared(settings.insert_deduplication_token.value, in_header); + }); + + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + } + + /// Number of streams works like this: + /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever + /// InterpreterSelectQuery ends up with. + /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. + /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. + /// * If the table supports parallel inserts, use max_insert_threads for writing to IStorage. + /// Otherwise ResizeProcessor them down to 1 stream. + + size_t presink_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); + size_t sink_streams_size = table->supportsParallelInsert() ? std::max(1, settings.max_insert_threads) : 1; + + auto [presink_chains, sink_chains] = buildPreAndSyncChains( + presink_streams_size, sink_streams_size, + table, metadata_snapshot, query_sample_block); + + pipeline.resize(presink_chains.size()); + auto actions_dag = ActionsDAG::makeConvertingActions( pipeline.getHeader().getColumnsWithTypeAndName(), query_sample_block.getColumnsWithTypeAndName(), @@ -560,54 +623,12 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & return counting; }); - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - }); - } - - /// Number of streams works like this: - /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever - /// InterpreterSelectQuery ends up with. - /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. - /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. - /// * If the table supports parallel inserts, use the same streams for writing to IStorage. - /// Otherwise ResizeProcessor them down to 1 stream. - - size_t presink_streams_size = std::max(1, std::max(settings.max_insert_threads, pipeline.getNumStreams())); - size_t sink_streams_size = table->supportsParallelInsert() ? presink_streams_size : 1; - - auto [presink_chains, sink_chains] = buildPreAndSyncChains( - presink_streams_size, sink_streams_size, - table, metadata_snapshot, query_sample_block); - - if (!settings.insert_deduplication_token.value.empty()) - { - pipeline.resize(1); - - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr - { - return std::make_shared(settings.insert_deduplication_token.value, in_header); - }); - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - } - - pipeline.resize(presink_chains.size()); for (auto & chain : presink_chains) pipeline.addResources(chain.detachResources()); pipeline.addChains(std::move(presink_chains)); pipeline.resize(sink_streams_size); + for (auto & chain : sink_chains) pipeline.addResources(chain.detachResources()); pipeline.addChains(std::move(sink_chains)); @@ -655,12 +676,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query chain.appendChain(std::move(sink_chains.front())); } - if (!settings.insert_deduplication_token.value.empty()) - { - chain.addSource(std::make_shared(chain.getInputHeader())); - chain.addSource(std::make_shared(settings.insert_deduplication_token.value, chain.getInputHeader())); - } - if (shouldAddSquashingFroStorage(table)) { bool table_prefers_large_blocks = table->prefersLargeBlocks(); @@ -673,6 +688,14 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query chain.addSource(std::move(squashing)); } + if (!settings.insert_deduplication_token.value.empty()) + { + chain.addSource(std::make_shared(chain.getInputHeader())); + chain.addSource(std::make_shared(settings.insert_deduplication_token.value, chain.getInputHeader())); + } + + chain.addSource(std::make_shared(chain.getInputHeader())); + auto context_ptr = getContext(); auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); counting->setProcessListElement(context_ptr->getProcessListElement()); diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index fff4a881e3d..36bb70f493f 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -16,9 +16,7 @@ void SinkToStorage::onConsume(Chunk chunk) Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); consume(chunk); - fillDeduplicationTokenForChildren(chunk); - if (!lastBlockIsDuplicate()) // TODO: remove that - cur_chunk = std::move(chunk); + cur_chunk = std::move(chunk); } SinkToStorage::GenerateResult SinkToStorage::onGenerate() diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index 21e003c4317..c350b9f79b0 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -4,7 +4,6 @@ #include #include #include -#include "Processors/Transforms/NumberBlocksTransform.h" namespace DB { @@ -23,25 +22,6 @@ public: protected: virtual void consume(Chunk & chunk) = 0; - virtual bool lastBlockIsDuplicate() const { return false; } - - void fillDeduplicationTokenForChildren(Chunk & chunk) const - { - auto token_info = chunk.getChunkInfos().get(); - if (token_info) - return; - - SipHash hash; - for (const auto & colunm: chunk.getColumns()) - { - colunm->updateHashFast(hash); - } - const auto hash_value = hash.get128(); - - chunk.getChunkInfos().add(std::make_shared( - fmt::format(":hash-{}", toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])) - )); - } private: std::vector table_locks; diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/NumberBlocksTransform.cpp index 61ff3f6bfd5..19ebf94a27a 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.cpp +++ b/src/Processors/Transforms/NumberBlocksTransform.cpp @@ -1 +1,157 @@ #include + +#include + +#include +#include +#include + + +#include + + +namespace DB +{ +namespace DeduplicationToken +{ + +String DB::DeduplicationToken::TokenInfo::getToken(bool enable_assert) const +{ + chassert(stage == MATERIALIZE_VIEW_ID || !enable_assert); + + String result; + result.reserve(getTotalSize()); + + for (const auto & part : parts) + result.append(part); + + return result; +} + +void DB::DeduplicationToken::TokenInfo::setInitialToken(String part) +{ + chassert(stage == INITIAL); + addTokenPart(std::move(part)); + stage = MATERIALIZE_VIEW_ID; +} + +void TokenInfo::setUserToken(const String & token) +{ + chassert(stage == INITIAL); + addTokenPart(fmt::format("user-token-{}", token)); + stage = SOURCE_BLOCK_NUMBER; +} + +void TokenInfo::setSourceBlockNumber(size_t sbn) +{ + chassert(stage == SOURCE_BLOCK_NUMBER); + addTokenPart(fmt::format(":source-number-{}", sbn)); + stage = MATERIALIZE_VIEW_ID; +} + +void TokenInfo::setMaterializeViewID(const String & id) +{ + chassert(stage == MATERIALIZE_VIEW_ID); + addTokenPart(fmt::format(":mv-{}", id)); + stage = MATERIALIZE_VIEW_BLOCK_NUMBER; +} + +void TokenInfo::setMaterializeViewBlockNumber(size_t mvbn) +{ + chassert(stage == MATERIALIZE_VIEW_BLOCK_NUMBER); + addTokenPart(fmt::format(":mv-bn-{}", mvbn)); + stage = MATERIALIZE_VIEW_ID; +} + +void TokenInfo::reset() +{ + stage = INITIAL; + parts.clear(); +} + +void TokenInfo::addTokenPart(String part) +{ + if (!part.empty()) + parts.push_back(std::move(part)); +} + +size_t TokenInfo::getTotalSize() const +{ + size_t size = 0; + for (const auto & part : parts) + size += part.size(); + return size; +} + +void CheckTokenTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); + + if (!must_be_present) + { + LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, no token required, token {}", debug, token_info->getToken(false)); + return; + } + + LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, token: {}", debug, token_info->getToken(false)); +} + +void SetInitialTokenTransform::transform(Chunk & chunk) +{ + auto token_builder = chunk.getChunkInfos().get(); + chassert(token_builder); + if (token_builder->tokenInitialized()) + return; + + SipHash hash; + for (const auto & colunm : chunk.getColumns()) + colunm->updateHashFast(hash); + + const auto hash_value = hash.get128(); + token_builder->setInitialToken(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); +} + +void SetUserTokenTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + chassert(token_info); + chassert(!token_info->tokenInitialized()); + token_info->setUserToken(user_token); +} + +void SetSourceBlockNumberTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + chassert(token_info); + chassert(!token_info->tokenInitialized()); + token_info->setSourceBlockNumber(block_number++); +} + +void SetMaterializeViewIDTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + chassert(token_info); + chassert(token_info->tokenInitialized()); + token_info->setMaterializeViewID(mv_id); +} + +void SetMaterializeViewBlockNumberTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + chassert(token_info); + chassert(token_info->tokenInitialized()); + token_info->setMaterializeViewBlockNumber(block_number++); +} + +void ResetTokenTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + chassert(token_info); + token_info->reset(); +} + +} +} diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index 6586f015d3e..46b62029c21 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -2,10 +2,9 @@ #include #include -#include -#include -#include +#include + namespace ErrorCodes { @@ -14,220 +13,6 @@ namespace ErrorCodes namespace DB { - struct SerialBlockNumberInfo : public ChunkInfoCloneable - { - SerialBlockNumberInfo(const SerialBlockNumberInfo & other) = default; - explicit SerialBlockNumberInfo(size_t block_number_) - : block_number(block_number_) - { - } - - size_t block_number = 0; - }; - - - class NumberBlocksTransform : public ISimpleTransform - { - public: - explicit NumberBlocksTransform(const Block & header) - : ISimpleTransform(header, header, true) - { - } - - String getName() const override { return "NumberBlocksTransform"; } - - void transform(Chunk & chunk) override - { - chunk.getChunkInfos().add(std::make_shared(block_number++)); - } - - private: - size_t block_number = 0; - }; - - - class DedupTokenInfo : public ChunkInfoCloneable - { - public: - DedupTokenInfo() = default; - DedupTokenInfo(const DedupTokenInfo & other) = default; - explicit DedupTokenInfo(String first_part) - { - addTokenPart(std::move(first_part)); - } - - String getToken() const - { - String result; - result.reserve(getTotalSize()); - - for (const auto & part : token_parts) - { - result.append(part); - } - - return result; - } - - bool empty() const - { - return token_parts.empty(); - } - - void addTokenPart(String part) - { - if (!part.empty()) - token_parts.push_back(std::move(part)); - } - - private: - size_t getTotalSize() const - { - size_t size = 0; - for (const auto & part : token_parts) - size += part.size(); - return size; - } - - std::vector token_parts; - }; - - class AddUserDeduplicationTokenTransform : public ISimpleTransform - { - public: - AddUserDeduplicationTokenTransform(String token_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , token(token_) - { - } - - String getName() const override { return "AddUserDeduplicationTokenTransform"; } - - void transform(Chunk & chunk) override - { - chunk.getChunkInfos().add(std::make_shared(token)); - } - - private: - String token; - }; - - - class CheckInsertDeduplicationTokenTransform : public ISimpleTransform - { - public: - CheckInsertDeduplicationTokenTransform(String debug_, bool must_be_present_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , debug(debug_) - , must_be_present(must_be_present_) - { - } - - String getName() const override { return "CheckInsertDeduplicationTokenTransform"; } - - void transform(Chunk & chunk) override - { - if (!must_be_present) - return; - - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); - - LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), - "{}, token: {}", - debug, token_info->getToken()); - } - - private: - String debug; - bool must_be_present = false; - }; - - - class ExtendDeduplicationWithBlockNumberFromInfoTokenTransform : public ISimpleTransform - { - public: - explicit ExtendDeduplicationWithBlockNumberFromInfoTokenTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "ExtendDeduplicationWithBlockNumberFromInfoTokenTransform"; } - - void transform(Chunk & chunk) override - { - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, recs {}", chunk.getChunkInfos().size()); - - auto block_number_info = chunk.getChunkInfos().get(); - if (!block_number_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have SerialBlockNumberInfo as ChunkInfo"); - - token_info->addTokenPart(fmt::format(":block-{}", block_number_info->block_number)); - - LOG_DEBUG(getLogger("ExtendDeduplicationWithBlockNumberFromInfoTokenTransform"), - "updated with {}, result: {}", - fmt::format(":block-{}", block_number_info->block_number), token_info->getToken()); - } - }; - - class ExtendDeduplicationWithBlockNumberTokenTransform : public ISimpleTransform - { - public: - explicit ExtendDeduplicationWithBlockNumberTokenTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "ExtendDeduplicationWithBlockNumberTokenTransform"; } - - void transform(Chunk & chunk) override - { - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo"); - - auto x = block_number++; - token_info->addTokenPart(fmt::format(":block-{}", x)); - - LOG_DEBUG(getLogger("ExtendDeduplicationWithBlockNumberTokenTransform"), - "updated with {}, result: {}", - fmt::format(":block-{}", x), token_info->getToken()); - } - private: - size_t block_number = 0; - }; - - class ExtendDeduplicationWithTokenPartTransform : public ISimpleTransform - { - public: - ExtendDeduplicationWithTokenPartTransform(String token_part_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , token_part(token_part_) - { - } - - String getName() const override { return "ExtendDeduplicationWithBlockNumberTokenTransform"; } - - void transform(Chunk & chunk) override - { - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, try to add token part {}", token_part); - - token_info->addTokenPart(fmt::format("{}", token_part)); - - LOG_DEBUG(getLogger("ExtendDeduplicationWithTokenPartTransform"), - "updated with {}, result: {}", - token_part, token_info->getToken()); - } - - private: - String token_part; - }; - class RestoreChunkInfosTransform : public ISimpleTransform { public: @@ -248,4 +33,176 @@ namespace DB Chunk::ChunkInfoCollection chunk_infos; }; + +namespace DeduplicationToken +{ + class TokenInfo : public ChunkInfoCloneable + { + public: + TokenInfo() = default; + TokenInfo(const TokenInfo & other) = default; + + String getToken(bool enable_assert = true) const; + + bool empty() const { return parts.empty(); } + bool tokenInitialized() const { return stage != INITIAL && stage != SOURCE_BLOCK_NUMBER; } + + void setInitialToken(String part); + void setUserToken(const String & token); + void setSourceBlockNumber(size_t sbn); + void setMaterializeViewID(const String & id); + void setMaterializeViewBlockNumber(size_t mvbn); + void reset(); + + private: + void addTokenPart(String part); + size_t getTotalSize() const; + + enum BuildingStage + { + INITIAL, + SOURCE_BLOCK_NUMBER, + MATERIALIZE_VIEW_ID, + MATERIALIZE_VIEW_BLOCK_NUMBER, + }; + + BuildingStage stage = INITIAL; + std::vector parts; + }; + + + class CheckTokenTransform : public ISimpleTransform + { + public: + CheckTokenTransform(String debug_, bool must_be_present_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , debug(debug_) + , must_be_present(must_be_present_) + { + } + + String getName() const override { return "DeduplicationToken::CheckTokenTransform"; } + + void transform(Chunk & chunk) override; + + private: + String debug; + bool must_be_present = false; + }; + + + class AddTokenInfoTransform : public ISimpleTransform + { + public: + explicit AddTokenInfoTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::AddTokenInfoTransform"; } + + void transform(Chunk & chunk) override + { + chunk.getChunkInfos().add(std::make_shared()); + } + }; + + + class SetInitialTokenTransform : public ISimpleTransform + { + public: + explicit SetInitialTokenTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::SetInitialTokenTransform"; } + + void transform(Chunk & chunk) override; + }; + + class ResetTokenTransform : public ISimpleTransform + { + public: + explicit ResetTokenTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::ResetTokenTransform"; } + + void transform(Chunk & chunk) override; + }; + + + class SetUserTokenTransform : public ISimpleTransform + { + public: + SetUserTokenTransform(String user_token_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , user_token(std::move(user_token_)) + { + } + + String getName() const override { return "DeduplicationToken::SetUserTokenTransform"; } + + void transform(Chunk & chunk) override; + + private: + String user_token; + }; + + + class SetSourceBlockNumberTransform : public ISimpleTransform + { + public: + explicit SetSourceBlockNumberTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::SetSourceBlockNumberTransform"; } + + void transform(Chunk & chunk) override; + + private: + size_t block_number; + }; + + + class SetMaterializeViewIDTransform : public ISimpleTransform + { + public: + SetMaterializeViewIDTransform(String mv_id_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , mv_id(std::move(mv_id_)) + { + } + + String getName() const override { return "DeduplicationToken::SetMaterializeViewIDTransform"; } + + void transform(Chunk & chunk) override; + + private: + String mv_id; + }; + + + class SetMaterializeViewBlockNumberTransform : public ISimpleTransform + { + public: + explicit SetMaterializeViewBlockNumberTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::SetMaterializeViewBlockNumberTransform"; } + + void transform(Chunk & chunk) override; + + private: + size_t block_number; + }; + +} } diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 7464cb79ba6..1a29b8d8a2d 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -79,7 +79,7 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( void SimpleSquashingChunksTransform::transform(Chunk & chunk) { LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - "transform {}", chunk.getNumRows()); + "transform {}, finished {}", chunk.getNumRows(), finished); if (!finished) { diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index ccecfcf3333..0c1893e0f37 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -108,7 +108,7 @@ private: class ExecutingInnerQueryFromViewTransform final : public ExceptionKeepingTransform { public: - ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_); + ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_, bool disable_deduplication_for_children_); String getName() const override { return "ExecutingInnerQueryFromView"; } @@ -119,6 +119,7 @@ protected: private: ViewsDataPtr views_data; ViewRuntimeData & view; + bool disable_deduplication_for_children; struct State { @@ -219,6 +220,11 @@ std::optional generateViewChain( const auto & insert_settings = insert_context->getSettingsRef(); + if (disable_deduplication_for_children) + { + insert_context->setSetting("insert_deduplicate", Field{false}); + } + // Processing of blocks for MVs is done block by block, and there will // be no parallel reading after (plus it is not a costless operation) select_context->setSetting("parallelize_output_from_storages", Field{false}); @@ -330,16 +336,6 @@ std::optional generateViewChain( bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, check_access); - out.addSource(std::make_shared("Before inner chain", !disable_deduplication_for_children, out.getInputHeader())); - - if (!disable_deduplication_for_children) - { - String addition_part = view_id.hasUUID() ? toString(view_id.uuid) : view_id.getFullNameNotQuoted(); - out.addSource(std::make_shared(fmt::format(":mv-{}", addition_part), out.getInputHeader())); - } - - out.addSource(std::make_shared("Before extend token", !disable_deduplication_for_children, out.getInputHeader())); - if (interpreter.shouldAddSquashingFroStorage(inner_table)) { bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); @@ -351,7 +347,7 @@ std::optional generateViewChain( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } - out.addSource(std::make_shared("Before squashing", !disable_deduplication_for_children, out.getInputHeader())); + out.addSource(std::make_shared("Before squashing", !disable_deduplication_for_children, out.getInputHeader())); auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); @@ -394,23 +390,15 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { - out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); - - // if (!disable_deduplication_for_children) - // { - // // out.addSource(std::make_shared(out.getInputHeader())); - // // out.addSource(std::make_shared(out.getInputHeader())); - - // out.addSource(std::make_shared(out.getInputHeader())); - // } + out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); auto executing_inner_query = std::make_shared( - storage_header, views_data->views.back(), views_data); + storage_header, views_data->views.back(), views_data, disable_deduplication_for_children); executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms); out.addSource(std::move(executing_inner_query)); - out.addSource(std::make_shared("Right before Inner query", !disable_deduplication_for_children, out.getInputHeader())); + out.addSource(std::make_shared("Right before Inner query", !disable_deduplication_for_children, out.getInputHeader())); } return out; @@ -451,8 +439,6 @@ Chain buildPushingToViewsChain( */ result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); - /// If the "root" table deduplicates blocks, there are no need to make deduplication for children - /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks bool disable_deduplication_for_children = false; if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) disable_deduplication_for_children = !no_destination && storage->supportsDeduplication(); @@ -563,6 +549,10 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); } + else + { + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + } if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); @@ -578,7 +568,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection chunk_infos) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection chunk_infos, bool disable_deduplication_for_children) { const auto & context = views_data.context; @@ -625,9 +615,18 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), std::make_shared(std::move(converting)))); - //pipeline.addTransform(std::make_shared(pipeline.getHeader())); pipeline.addTransform(std::make_shared(std::move(chunk_infos), pipeline.getHeader())); - pipeline.addTransform(std::make_shared(pipeline.getHeader())); + + if (!disable_deduplication_for_children) + { + String materialize_view_id = view.table_id.hasUUID() ? toString(view.table_id.uuid) : view.table_id.getFullNameNotQuoted(); + pipeline.addTransform(std::make_shared(std::move(materialize_view_id), pipeline.getHeader())); + pipeline.addTransform(std::make_shared(pipeline.getHeader())); + } + else + { + pipeline.addTransform(std::make_shared(pipeline.getHeader())); + } return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } @@ -720,17 +719,19 @@ IProcessor::Status CopyingDataToViewsTransform::prepare() ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( const Block & header, ViewRuntimeData & view_, - std::shared_ptr views_data_) + std::shared_ptr views_data_, + bool disable_deduplication_for_children_) : ExceptionKeepingTransform(header, view_.sample_block) , views_data(std::move(views_data_)) , view(view_) + , disable_deduplication_for_children(disable_deduplication_for_children_) { } void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); - state.emplace(process(block, view, *views_data, chunk.getChunkInfos())); + state.emplace(process(block, view, *views_data, chunk.getChunkInfos(), disable_deduplication_for_children)); } diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 6ca4ec6e079..b86845d48e0 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -740,7 +740,13 @@ bool StorageFileLog::streamToViews() auto new_context = Context::createCopy(getContext()); - InterpreterInsertQuery interpreter(insert, new_context, false, true, true, false); + InterpreterInsertQuery interpreter( + insert, + new_context, + false, + true, + true, + false); auto block_io = interpreter.execute(); /// Each stream responsible for closing it's files and store meta diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 2d29f87c556..4b0fa94e183 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -90,16 +90,20 @@ void MergeTreeSink::consume(Chunk & chunk) bool support_parallel_write = false; String block_dedup_token; - std::shared_ptr dedub_token_info_for_children = nullptr; + auto token_info = chunk.getChunkInfos().get(); if (storage.getDeduplicationLog()) { - auto token_info = chunk.getChunkInfos().get(); - if (!token_info && !context->getSettingsRef().insert_deduplication_token.value.empty()) + if (!token_info) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + "DedupTokenBuilder is expected for consumed chunk in MergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); - if (token_info) + if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "DedupTokenBuilder has to be initialized with user token for table: {}", + storage.getStorageID().getNameForLogs()); + + if (token_info->tokenInitialized()) { block_dedup_token = token_info->getToken(); @@ -109,9 +113,6 @@ void MergeTreeSink::consume(Chunk & chunk) } else { - dedub_token_info_for_children = std::make_shared(); - chunk.getChunkInfos().add(dedub_token_info_for_children); - LOG_DEBUG(storage.log, "dedup token from hash is calculated"); } @@ -141,10 +142,10 @@ void MergeTreeSink::consume(Chunk & chunk) if (!temp_part.part) continue; - if (dedub_token_info_for_children) + if (!token_info->tokenInitialized()) { chassert(temp_part.part); - dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + token_info->setInitialToken(temp_part.part->getPartBlockIDHash()); } if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index e855bb7d969..b03f3f88611 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -294,17 +294,21 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } String block_dedup_token; - std::shared_ptr dedub_token_info_for_children = nullptr; + auto token_info = chunk.getChunkInfos().get(); if constexpr (!async_insert) { - auto token_info = chunk.getChunkInfos().get(); - if (!token_info && !context->getSettingsRef().insert_deduplication_token.value.empty()) + if (!token_info) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + "DedupTokenBuilder is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); + + if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "DedupTokenBuilder has to be initialized with user token for table: {}", storage.getStorageID().getNameForLogs()); - if (token_info) + if (token_info->tokenInitialized()) { /// multiple blocks can be inserted within the same insert query /// an ordinal number is added to dedup token to generate a distinctive block id for each block @@ -316,8 +320,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } else { - dedub_token_info_for_children = std::make_shared(); - chunk.getChunkInfos().add(dedub_token_info_for_children); LOG_DEBUG(storage.log, "dedup token from hash is calculated"); } @@ -386,10 +388,10 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - if (dedub_token_info_for_children) + if (!token_info->tokenInitialized()) { chassert(temp_part.part); - dedub_token_info_for_children->addTokenPart(":block_hash-" + temp_part.part->getPartBlockIDHash()); + token_info->setInitialToken(temp_part.part->getPartBlockIDHash()); } } @@ -444,8 +446,8 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) /// value for `last_block_is_duplicate`, which is possible only after the part is committed. /// Othervide we can delay commit. /// TODO: we can also delay commit if there is no MVs. - if (!settings.deduplicate_blocks_in_dependent_materialized_views) - finishDelayedChunk(zookeeper); + // if (!settings.deduplicate_blocks_in_dependent_materialized_views) + // finishDelayedChunk(zookeeper); ++num_blocks_processed; } @@ -456,8 +458,6 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF if (!delayed_chunk) return; - last_block_is_duplicate = false; - for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -470,8 +470,6 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF { bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num).second; - last_block_is_duplicate = last_block_is_duplicate || deduplicated; - /// Set a special error code if the block is duplicate int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index e460804d7f1..7d025361717 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -59,16 +59,6 @@ public: /// For ATTACHing existing data on filesystem. bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part); - /// For proper deduplication in MaterializedViews - bool lastBlockIsDuplicate() const override - { - /// If MV is responsible for deduplication, block is not considered duplicating. - if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) - return false; - - return last_block_is_duplicate; - } - struct DelayedChunk; private: std::vector detectConflictsInAsyncBlockIDs(const std::vector & ids); @@ -126,7 +116,6 @@ private: bool allow_attach_while_readonly = false; bool quorum_parallel = false; const bool deduplicate = true; - bool last_block_is_duplicate = false; UInt64 num_blocks_processed = 0; LoggerPtr log; diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference index 335b55f05c8..ae82b9c0463 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference @@ -10,7 +10,8 @@ 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 42 +2022-09-01 12:00:00 84 +2023-09-01 12:00:00 42 -- Original issue with deduplicate_blocks_in_dependent_materialized_views = 1 AND max_insert_delayed_streams_for_parallel_write > 1 -- Landing 2022-09-01 12:23:34 42 diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql index f206f0d7775..06fe156500d 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql @@ -56,6 +56,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view - 2nd insert gets first block 20220901 deduplicated and second one inserted in landing table - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded + Now it is fixed. */ SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 1000; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference index 9b4738ce805..641735d1bb6 100644 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference @@ -121,47 +121,93 @@ OK Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b -count 1 +count 10 table_when_b_even -count 1 -EXPECTED_TO_FAIL +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +FIXED Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b -count 1 +count 10 table_when_b_even -count 1 -EXPECTED_TO_FAIL +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b -count 1 +count 10 table_when_b_even count 5 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b -count 1 +count 10 table_when_b_even count 10 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +FIXED Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even -count 1 +count 5 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +FIXED Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even -count 1 +count 10 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +FIXED Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b @@ -555,47 +601,93 @@ OK Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b -count 1 +count 10 table_when_b_even -count 1 -EXPECTED_TO_FAIL +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +FIXED Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b -count 1 +count 10 table_when_b_even -count 1 -EXPECTED_TO_FAIL +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b -count 1 +count 10 table_when_b_even count 5 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +FIXED Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b -count 1 +count 10 table_when_b_even count 10 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +FIXED Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 10 table_when_b_even -count 1 +count 5 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +FIXED Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 10 table_when_b_even -count 1 +count 10 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +FIXED Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference index 4411bdecea8..06f30793670 100644 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference @@ -121,47 +121,93 @@ OK Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b -count 1 +count 5 table_when_b_even_and_joined -count 10 -EXPECTED_TO_FAIL +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b -count 1 +count 5 table_when_b_even_and_joined -count 9 -EXPECTED_TO_FAIL +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b -count 1 +count 5 table_when_b_even_and_joined count 47 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +FIXED Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b -count 1 +count 5 table_when_b_even_and_joined count 45 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +FIXED Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined -count 10 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 45 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b @@ -197,9 +243,16 @@ Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -250,9 +303,16 @@ Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -303,9 +363,16 @@ Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -356,9 +423,16 @@ Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -527,47 +601,93 @@ OK Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_a_b -count 1 +count 5 table_when_b_even_and_joined -count 10 -EXPECTED_TO_FAIL +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b -count 1 +count 5 table_when_b_even_and_joined -count 9 -EXPECTED_TO_FAIL +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_a_b -count 1 +count 5 table_when_b_even_and_joined count 47 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +FIXED Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False table_a_b -count 1 +count 5 table_when_b_even_and_joined count 45 -EXPECTED_TO_FAIL +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +FIXED Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_a_b count 5 table_when_b_even_and_joined -count 10 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 45 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +FIXED Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_a_b @@ -603,9 +723,16 @@ Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -656,9 +783,16 @@ Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -709,9 +843,16 @@ Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -762,9 +903,16 @@ Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1055,9 +1203,16 @@ Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1108,9 +1263,16 @@ Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1161,9 +1323,16 @@ Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1214,9 +1383,16 @@ Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1507,9 +1683,16 @@ Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_ table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1560,9 +1743,16 @@ Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_ table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1613,9 +1803,16 @@ Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_ table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_a_b @@ -1666,9 +1863,16 @@ Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_ table_a_b count 5 table_when_b_even_and_joined -count 14 +count 47 0 -EXPECTED_TO_FAIL +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +FIXED Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_a_b diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference index a56f7deb744..4d517948a25 100644 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference @@ -88,36 +88,70 @@ table_dst count 32 OK Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 1 -table_dst count 2 -EXPECTED_TO_FAIL +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -EXPECTED_TO_FAIL +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +FIXED Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 1 +table_src count 8 table_dst count 6 -EXPECTED_TO_FAIL +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +FIXED Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 +table_src count 8 table_dst count 16 -EXPECTED_TO_FAIL +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +FIXED Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 2 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 16 +table_dst count 6 +0 +0 +FIXED Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 -table_dst count 2 +table_dst count 16 0 -EXPECTED_TO_FAIL +0 +table_src count 16 +table_dst count 16 +0 +0 +FIXED Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -143,19 +177,25 @@ OK Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -181,19 +221,25 @@ OK Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -219,19 +265,25 @@ OK Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -257,19 +309,25 @@ OK Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -382,36 +440,70 @@ table_dst count 32 OK Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 1 -table_dst count 2 -EXPECTED_TO_FAIL +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -EXPECTED_TO_FAIL +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +FIXED Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 1 +table_src count 8 table_dst count 6 -EXPECTED_TO_FAIL +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +FIXED Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 +table_src count 8 table_dst count 16 -EXPECTED_TO_FAIL +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +FIXED Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 2 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 16 +table_dst count 6 +0 +0 +FIXED Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False table_src count 8 -table_dst count 2 +table_dst count 16 0 -EXPECTED_TO_FAIL +0 +table_src count 16 +table_dst count 16 +0 +0 +FIXED Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -437,19 +529,25 @@ OK Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -475,19 +573,25 @@ OK Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -513,19 +617,25 @@ OK Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -551,19 +661,25 @@ OK Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -765,19 +881,25 @@ OK Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -803,19 +925,25 @@ OK Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -841,19 +969,25 @@ OK Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -879,19 +1013,25 @@ OK Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -1093,19 +1233,25 @@ OK Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -1131,19 +1277,25 @@ OK Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -1169,19 +1321,25 @@ OK Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +table_src count 8 +table_dst count 6 +0 +0 +FIXED Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False table_src count 1 -table_dst count 1 +table_dst count 2 0 0 table_src count 1 -table_dst count 1 +table_dst count 2 0 -EXPECTED_TO_FAIL +0 +FIXED Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 @@ -1207,19 +1365,25 @@ OK Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True table_src count 8 -table_dst count 4 -0 -EXPECTED_TO_FAIL - -Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 1 +table_dst count 6 0 0 table_src count 16 -table_dst count 1 +table_dst count 6 0 -EXPECTED_TO_FAIL +0 +FIXED + +Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +FIXED Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True table_src count 8 From 55ff6446b5035588eb6985e6fa2291ca444f0a00 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 7 May 2024 12:20:31 +0200 Subject: [PATCH 058/273] adjust rebase --- src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp | 7 +++++-- src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 7094578a9cc..c55bcd08573 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -191,7 +191,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali return {std::move(serialized_key_column), std::move(serialized_value_column)}; } -void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) +void EmbeddedRocksDBBulkSink::consume(Chunk & chunk_) { std::vector to_written = squash(std::move(chunk_)); @@ -217,7 +217,10 @@ void EmbeddedRocksDBBulkSink::onFinish() { /// If there is any data left, write it. if (!chunks.empty()) - consume({}); + { + Chunk empty; + consume(empty); + } } String EmbeddedRocksDBBulkSink::getTemporarySSTFilePath() diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h index 19ce1e3b83e..be425208357 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h @@ -34,7 +34,7 @@ public: ~EmbeddedRocksDBBulkSink() override; - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; From 7a8f6b120699a9c4baf7a465ed21fa0aa66ddda5 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 17 May 2024 16:26:15 +0200 Subject: [PATCH 059/273] fix window view and other tests --- .../Sources/SourceFromSingleChunk.cpp | 4 +- .../Transforms/NumberBlocksTransform.cpp | 22 ++++++++-- .../Transforms/NumberBlocksTransform.h | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 8 ++-- src/Storages/LiveView/StorageLiveView.cpp | 4 +- src/Storages/LiveView/StorageLiveView.h | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 4 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 16 ++++++-- src/Storages/WindowView/StorageWindowView.cpp | 41 +++++++++++++++---- src/Storages/WindowView/StorageWindowView.h | 2 +- .../03035_max_insert_threads_support.sh | 2 +- 11 files changed, 78 insertions(+), 29 deletions(-) diff --git a/src/Processors/Sources/SourceFromSingleChunk.cpp b/src/Processors/Sources/SourceFromSingleChunk.cpp index fb888c104c4..9abe0504d10 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.cpp +++ b/src/Processors/Sources/SourceFromSingleChunk.cpp @@ -5,7 +5,9 @@ namespace DB { -SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) {} +SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) +{ +} SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows()) { diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/NumberBlocksTransform.cpp index 19ebf94a27a..387d1ceb8e0 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.cpp +++ b/src/Processors/Transforms/NumberBlocksTransform.cpp @@ -12,6 +12,12 @@ namespace DB { + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace DeduplicationToken { @@ -101,9 +107,17 @@ void CheckTokenTransform::transform(Chunk & chunk) void SetInitialTokenTransform::transform(Chunk & chunk) { - auto token_builder = chunk.getChunkInfos().get(); - chassert(token_builder); - if (token_builder->tokenInitialized()) + auto token_info = chunk.getChunkInfos().get(); + + LOG_DEBUG(getLogger("SetInitialTokenTransform"), "has token_info {}", bool(token_info)); + + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetInitialTokenTransform"); + + chassert(token_info); + if (!token_info || token_info->tokenInitialized()) return; SipHash hash; @@ -111,7 +125,7 @@ void SetInitialTokenTransform::transform(Chunk & chunk) colunm->updateHashFast(hash); const auto hash_value = hash.get128(); - token_builder->setInitialToken(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); + token_info->setInitialToken(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); } void SetUserTokenTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index 46b62029c21..6978fe5e6b6 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -166,7 +166,7 @@ namespace DeduplicationToken void transform(Chunk & chunk) override; private: - size_t block_number; + size_t block_number = 0; }; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 0c1893e0f37..2e6baea7c26 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -24,8 +24,8 @@ #include #include #include -#include "Processors/Chunk.h" -#include "Processors/Transforms/NumberBlocksTransform.h" +#include +#include #include #include @@ -766,7 +766,7 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi void PushingToLiveViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(getHeader().cloneWithColumns(chunk.getColumns()), context); + live_view.writeBlock(getHeader(), chunk, context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -790,7 +790,7 @@ void PushingToWindowViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader().cloneWithColumns(chunk.getColumns()), context); + window_view, getHeader(), chunk, context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index c3aacfd67d3..f6008347425 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -330,7 +330,7 @@ Pipe StorageLiveView::watch( return reader; } -void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) +void StorageLiveView::writeBlock(const Block & header, Chunk & chunk, ContextPtr local_context) { auto output = std::make_shared(*this); @@ -363,7 +363,7 @@ void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) if (!is_block_processed) { Pipes pipes; - pipes.emplace_back(std::make_shared(block)); + pipes.emplace_back(std::make_shared(header, chunk.clone())); auto creator = [&](const StorageID & blocks_id_global) { diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 91daac32c7b..fce5bad6240 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -118,7 +118,7 @@ public: return 0; } - void writeBlock(const Block & block, ContextPtr context); + void writeBlock(const Block & header, Chunk & chunk, ContextPtr context); void refresh(); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 4b0fa94e183..c252d95a5e9 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -95,12 +95,12 @@ void MergeTreeSink::consume(Chunk & chunk) { if (!token_info) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenBuilder is expected for consumed chunk in MergeTreeSink for table: {}", + "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenBuilder has to be initialized with user token for table: {}", + "TokenInfo has to be initialized with user token for table: {}", storage.getStorageID().getNameForLogs()); if (token_info->tokenInitialized()) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index b03f3f88611..41fdb86f3bd 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -299,12 +299,12 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) { if (!token_info) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenBuilder is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", + "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "DedupTokenBuilder has to be initialized with user token for table: {}", + "TokenInfo has to be initialized with user token for table: {}", storage.getStorageID().getNameForLogs()); @@ -1174,8 +1174,16 @@ void ReplicatedMergeTreeSinkImpl::onStart() template void ReplicatedMergeTreeSinkImpl::onFinish() { - auto zookeeper = storage.getZooKeeper(); - finishDelayedChunk(std::make_shared(zookeeper)); + const auto & settings = context->getSettingsRef(); + + ZooKeeperWithFaultInjectionPtr zookeeper = ZooKeeperWithFaultInjection::createInstance( + settings.insert_keeper_fault_injection_probability, + settings.insert_keeper_fault_injection_seed, + storage.getZooKeeper(), + "ReplicatedMergeTreeSink::onFinish", + log); + + finishDelayedChunk(zookeeper); } template diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index e0f3b437af7..b81ca34c427 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -1415,22 +1416,25 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) } void StorageWindowView::writeIntoWindowView( - StorageWindowView & window_view, const Block & block, ContextPtr local_context) + StorageWindowView & window_view, const Block & header, Chunk & chunk, ContextPtr local_context) { window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) std::this_thread::sleep_for(std::chrono::milliseconds(100)); - if (!window_view.is_proctime && window_view.max_watermark == 0 && block.rows() > 0) + if (!window_view.is_proctime && window_view.max_watermark == 0 && chunk.getNumRows() > 0) { std::lock_guard lock(window_view.fire_signal_mutex); - const auto & window_column = block.getByName(window_view.timestamp_column_name); + const auto & window_column = header.getByName(window_view.timestamp_column_name); const ColumnUInt32::Container & window_end_data = static_cast(*window_column.column).getData(); UInt32 first_record_timestamp = window_end_data[0]; window_view.max_watermark = window_view.getWindowUpperBound(first_record_timestamp); } - Pipe pipe(std::make_shared(block.cloneEmpty(), Chunk(block.getColumns(), block.rows()))); + auto chunk_infos = chunk.getChunkInfos(); + chunk.setChunkInfos({}); + + Pipe pipe(std::make_shared(header.cloneEmpty(), std::move(chunk))); UInt32 lateness_bound = 0; UInt32 t_max_watermark = 0; @@ -1475,10 +1479,10 @@ void StorageWindowView::writeIntoWindowView( auto syntax_result = TreeRewriter(local_context).analyze(query, columns); auto filter_expression = ExpressionAnalyzer(filter_function, syntax_result, local_context).getActionsDAG(false); - pipe.addSimpleTransform([&](const Block & header) + pipe.addSimpleTransform([&](const Block & header_) { return std::make_shared( - header, std::make_shared(filter_expression), + header_, std::make_shared(filter_expression), filter_function->getColumnName(), true); }); } @@ -1533,6 +1537,17 @@ void StorageWindowView::writeIntoWindowView( QueryProcessingStage::WithMergeableState); builder = select_block.buildQueryPipeline(); + + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(std::move(chunk_infos), stream_header); + }); + + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared("StorageWindowView: Afrer tmp table before squasing", true, stream_header); + }); + builder.addSimpleTransform([&](const Block & current_header) { return std::make_shared( @@ -1546,7 +1561,7 @@ void StorageWindowView::writeIntoWindowView( UInt32 block_max_timestamp = 0; if (window_view.is_watermark_bounded || window_view.allowed_lateness) { - const auto & timestamp_column = *block.getByName(window_view.timestamp_column_name).column; + const auto & timestamp_column = *header.getByName(window_view.timestamp_column_name).column; const auto & timestamp_data = typeid_cast(timestamp_column).getData(); for (const auto & timestamp : timestamp_data) block_max_timestamp = std::max(timestamp, block_max_timestamp); @@ -1572,6 +1587,11 @@ void StorageWindowView::writeIntoWindowView( lateness_upper_bound); }); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared("StorageWindowView: Afrer WatermarkTransform", true, stream_header); + }); + auto inner_table = window_view.getInnerTable(); auto lock = inner_table->lockForShare( local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); @@ -1588,9 +1608,14 @@ void StorageWindowView::writeIntoWindowView( auto convert_actions = std::make_shared( convert_actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); - builder.addSimpleTransform([&](const Block & header) { return std::make_shared(header, convert_actions); }); + builder.addSimpleTransform([&](const Block & header_) { return std::make_shared(header_, convert_actions); }); } + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared("StorageWindowView: Before out", true, stream_header); + }); + builder.addChain(Chain(std::move(output))); builder.setSinks([&](const Block & cur_header, Pipe::StreamType) { diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index f79867df424..56a21279b86 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -166,7 +166,7 @@ public: BlockIO populate(); - static void writeIntoWindowView(StorageWindowView & window_view, const Block & block, ContextPtr context); + static void writeIntoWindowView(StorageWindowView & window_view, const Block & header, Chunk & chunk, ContextPtr context); ASTPtr getMergeableQuery() const { return mergeable_query->clone(); } diff --git a/tests/queries/0_stateless/03035_max_insert_threads_support.sh b/tests/queries/0_stateless/03035_max_insert_threads_support.sh index 1e6bfb414d8..cedb651a430 100755 --- a/tests/queries/0_stateless/03035_max_insert_threads_support.sh +++ b/tests/queries/0_stateless/03035_max_insert_threads_support.sh @@ -8,7 +8,7 @@ DATA_FILE="data_$CLICKHOUSE_TEST_UNIQUE_NAME.csv" $CLICKHOUSE_CLIENT --max_insert_threads=4 --query=" EXPLAIN PIPELINE INSERT INTO FUNCTION file('$DATA_FILE') SELECT * FROM numbers_mt(1000000) ORDER BY number DESC -" | grep -o MaterializingTransform | wc -l +" | grep -o StorageFileSink | wc -l DATA_FILE_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path from file('$DATA_FILE', 'One')") rm $DATA_FILE_PATH From 4fa59ca49dc7536dbe5b10cbe1f56cd411415aa2 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 17 May 2024 17:42:18 +0200 Subject: [PATCH 060/273] adjust style --- src/Processors/Transforms/NumberBlocksTransform.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index 6978fe5e6b6..610c219dfa2 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -6,11 +6,6 @@ #include -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - namespace DB { class RestoreChunkInfosTransform : public ISimpleTransform From ae124bf0b36958be0f1ef492272cd85664e50eb7 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 21 May 2024 17:07:31 +0200 Subject: [PATCH 061/273] fix tests for liveview windowview --- .../Transforms/NumberBlocksTransform.cpp | 64 +++++++++++-------- .../Transforms/NumberBlocksTransform.h | 37 ++++++----- .../Transforms/buildPushingToViewsChain.cpp | 8 +-- src/Storages/LiveView/StorageLiveView.cpp | 20 +++++- src/Storages/LiveView/StorageLiveView.h | 2 +- src/Storages/WindowView/StorageWindowView.cpp | 38 ++++++++--- src/Storages/WindowView/StorageWindowView.h | 2 +- ...view_and_deduplication_zookeeper.reference | 4 +- ...lized_view_and_deduplication_zookeeper.sql | 2 +- 9 files changed, 116 insertions(+), 61 deletions(-) diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/NumberBlocksTransform.cpp index 387d1ceb8e0..11054f652ff 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.cpp +++ b/src/Processors/Transforms/NumberBlocksTransform.cpp @@ -23,7 +23,7 @@ namespace DeduplicationToken String DB::DeduplicationToken::TokenInfo::getToken(bool enable_assert) const { - chassert(stage == MATERIALIZE_VIEW_ID || !enable_assert); + chassert(stage == VIEW_ID || !enable_assert); String result; result.reserve(getTotalSize()); @@ -38,7 +38,7 @@ void DB::DeduplicationToken::TokenInfo::setInitialToken(String part) { chassert(stage == INITIAL); addTokenPart(std::move(part)); - stage = MATERIALIZE_VIEW_ID; + stage = VIEW_ID; } void TokenInfo::setUserToken(const String & token) @@ -52,21 +52,21 @@ void TokenInfo::setSourceBlockNumber(size_t sbn) { chassert(stage == SOURCE_BLOCK_NUMBER); addTokenPart(fmt::format(":source-number-{}", sbn)); - stage = MATERIALIZE_VIEW_ID; + stage = VIEW_ID; } -void TokenInfo::setMaterializeViewID(const String & id) +void TokenInfo::setViewID(const String & id) { - chassert(stage == MATERIALIZE_VIEW_ID); - addTokenPart(fmt::format(":mv-{}", id)); - stage = MATERIALIZE_VIEW_BLOCK_NUMBER; + chassert(stage == VIEW_ID); + addTokenPart(fmt::format(":view-id-{}", id)); + stage = VIEW_BLOCK_NUMBER; } -void TokenInfo::setMaterializeViewBlockNumber(size_t mvbn) +void TokenInfo::setViewBlockNumber(size_t mvbn) { - chassert(stage == MATERIALIZE_VIEW_BLOCK_NUMBER); - addTokenPart(fmt::format(":mv-bn-{}", mvbn)); - stage = MATERIALIZE_VIEW_ID; + chassert(stage == VIEW_BLOCK_NUMBER); + addTokenPart(fmt::format(":view-block-{}", mvbn)); + stage = VIEW_ID; } void TokenInfo::reset() @@ -116,8 +116,7 @@ void SetInitialTokenTransform::transform(Chunk & chunk) ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetInitialTokenTransform"); - chassert(token_info); - if (!token_info || token_info->tokenInitialized()) + if (token_info->tokenInitialized()) return; SipHash hash; @@ -131,39 +130,52 @@ void SetInitialTokenTransform::transform(Chunk & chunk) void SetUserTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - chassert(token_info); - chassert(!token_info->tokenInitialized()); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetUserTokenTransform"); token_info->setUserToken(user_token); } void SetSourceBlockNumberTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - chassert(token_info); - chassert(!token_info->tokenInitialized()); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform"); token_info->setSourceBlockNumber(block_number++); } -void SetMaterializeViewIDTransform::transform(Chunk & chunk) +void SetViewIDTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - chassert(token_info); - chassert(token_info->tokenInitialized()); - token_info->setMaterializeViewID(mv_id); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetViewIDTransform"); + token_info->setViewID(view_id); } -void SetMaterializeViewBlockNumberTransform::transform(Chunk & chunk) +void SetViewBlockNumberTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - chassert(token_info); - chassert(token_info->tokenInitialized()); - token_info->setMaterializeViewBlockNumber(block_number++); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform"); + token_info->setViewBlockNumber(block_number++); } void ResetTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - chassert(token_info); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in ResetTokenTransform"); + + LOG_DEBUG(getLogger("ResetTokenTransform"), "token_info was {}", token_info->getToken(false)); token_info->reset(); } diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index 610c219dfa2..b4f61eb887c 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -13,14 +13,21 @@ namespace DB public: RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_) : ISimpleTransform(header_, header_, true) - , chunk_infos(chunk_infos_) + , chunk_infos(std::move(chunk_infos_)) { + LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "create RestoreChunkInfosTransform to append {}:{}", + chunk_infos.size(), chunk_infos.debug()); } String getName() const override { return "RestoreChunkInfosTransform"; } void transform(Chunk & chunk) override { + LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "chunk infos before: {}:{}, append: {}:{}, chunk has rows {}", + chunk.getChunkInfos().size(), chunk.getChunkInfos().debug(), + chunk_infos.size(), chunk_infos.debug(), + chunk.getNumRows()); + chunk.getChunkInfos().append(chunk_infos.clone()); } @@ -45,8 +52,8 @@ namespace DeduplicationToken void setInitialToken(String part); void setUserToken(const String & token); void setSourceBlockNumber(size_t sbn); - void setMaterializeViewID(const String & id); - void setMaterializeViewBlockNumber(size_t mvbn); + void setViewID(const String & id); + void setViewBlockNumber(size_t mvbn); void reset(); private: @@ -57,8 +64,8 @@ namespace DeduplicationToken { INITIAL, SOURCE_BLOCK_NUMBER, - MATERIALIZE_VIEW_ID, - MATERIALIZE_VIEW_BLOCK_NUMBER, + VIEW_ID, + VIEW_BLOCK_NUMBER, }; BuildingStage stage = INITIAL; @@ -71,7 +78,7 @@ namespace DeduplicationToken public: CheckTokenTransform(String debug_, bool must_be_present_, const Block & header_) : ISimpleTransform(header_, header_, true) - , debug(debug_) + , debug(std::move(debug_)) , must_be_present(must_be_present_) { } @@ -165,38 +172,38 @@ namespace DeduplicationToken }; - class SetMaterializeViewIDTransform : public ISimpleTransform + class SetViewIDTransform : public ISimpleTransform { public: - SetMaterializeViewIDTransform(String mv_id_, const Block & header_) + SetViewIDTransform(String view_id_, const Block & header_) : ISimpleTransform(header_, header_, true) - , mv_id(std::move(mv_id_)) + , view_id(std::move(view_id_)) { } - String getName() const override { return "DeduplicationToken::SetMaterializeViewIDTransform"; } + String getName() const override { return "DeduplicationToken::SetViewIDTransform"; } void transform(Chunk & chunk) override; private: - String mv_id; + String view_id; }; - class SetMaterializeViewBlockNumberTransform : public ISimpleTransform + class SetViewBlockNumberTransform : public ISimpleTransform { public: - explicit SetMaterializeViewBlockNumberTransform(const Block & header_) + explicit SetViewBlockNumberTransform(const Block & header_) : ISimpleTransform(header_, header_, true) { } - String getName() const override { return "DeduplicationToken::SetMaterializeViewBlockNumberTransform"; } + String getName() const override { return "DeduplicationToken::SetViewBlockNumberTransform"; } void transform(Chunk & chunk) override; private: - size_t block_number; + size_t block_number = 0; }; } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 2e6baea7c26..47ac1f3baed 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -620,8 +620,8 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat if (!disable_deduplication_for_children) { String materialize_view_id = view.table_id.hasUUID() ? toString(view.table_id.uuid) : view.table_id.getFullNameNotQuoted(); - pipeline.addTransform(std::make_shared(std::move(materialize_view_id), pipeline.getHeader())); - pipeline.addTransform(std::make_shared(pipeline.getHeader())); + pipeline.addTransform(std::make_shared(std::move(materialize_view_id), pipeline.getHeader())); + pipeline.addTransform(std::make_shared(pipeline.getHeader())); } else { @@ -766,7 +766,7 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi void PushingToLiveViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(getHeader(), chunk, context); + live_view.writeBlock(live_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -790,7 +790,7 @@ void PushingToWindowViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader(), chunk, context); + window_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index f6008347425..b9d29a90f56 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -27,6 +27,7 @@ limitations under the License. */ #include #include #include +#include "Processors/Transforms/NumberBlocksTransform.h" #include #include @@ -330,7 +331,7 @@ Pipe StorageLiveView::watch( return reader; } -void StorageLiveView::writeBlock(const Block & header, Chunk & chunk, ContextPtr local_context) +void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) { auto output = std::make_shared(*this); @@ -363,7 +364,7 @@ void StorageLiveView::writeBlock(const Block & header, Chunk & chunk, ContextPtr if (!is_block_processed) { Pipes pipes; - pipes.emplace_back(std::make_shared(header, chunk.clone())); + pipes.emplace_back(std::make_shared(block)); auto creator = [&](const StorageID & blocks_id_global) { @@ -407,6 +408,21 @@ void StorageLiveView::writeBlock(const Block & header, Chunk & chunk, ContextPtr builder = interpreter.buildQueryPipeline(); } + builder.addSimpleTransform([&](const Block & cur_header) + { + return std::make_shared(chunk_infos.clone(), cur_header); + }); + + String live_view_id = live_view.getStorageID().hasUUID() ? toString(live_view.getStorageID().uuid) : live_view.getStorageID().getFullNameNotQuoted(); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(live_view_id, stream_header); + }); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(stream_header); + }); + builder.addSimpleTransform([&](const Block & cur_header) { return std::make_shared(cur_header); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index fce5bad6240..12d8e898347 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -118,7 +118,7 @@ public: return 0; } - void writeBlock(const Block & header, Chunk & chunk, ContextPtr context); + void writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); void refresh(); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index b81ca34c427..738de4b07ed 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1416,25 +1416,27 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) } void StorageWindowView::writeIntoWindowView( - StorageWindowView & window_view, const Block & header, Chunk & chunk, ContextPtr local_context) + StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) { + LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: rows {}, infos {} with {}, window column {}", + block.rows(), + chunk_infos.size(), chunk_infos.debug(), + window_view.timestamp_column_name); + window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) std::this_thread::sleep_for(std::chrono::milliseconds(100)); - if (!window_view.is_proctime && window_view.max_watermark == 0 && chunk.getNumRows() > 0) + if (!window_view.is_proctime && window_view.max_watermark == 0 && block.rows() > 0) { std::lock_guard lock(window_view.fire_signal_mutex); - const auto & window_column = header.getByName(window_view.timestamp_column_name); + const auto & window_column = block.getByName(window_view.timestamp_column_name); const ColumnUInt32::Container & window_end_data = static_cast(*window_column.column).getData(); UInt32 first_record_timestamp = window_end_data[0]; window_view.max_watermark = window_view.getWindowUpperBound(first_record_timestamp); } - auto chunk_infos = chunk.getChunkInfos(); - chunk.setChunkInfos({}); - - Pipe pipe(std::make_shared(header.cloneEmpty(), std::move(chunk))); + Pipe pipe(std::make_shared(block)); UInt32 lateness_bound = 0; UInt32 t_max_watermark = 0; @@ -1465,6 +1467,9 @@ void StorageWindowView::writeIntoWindowView( lateness_bound = t_max_fired_watermark; } + LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: lateness_bound {}, window_view.is_proctime {}", + lateness_bound, window_view.is_proctime); + if (lateness_bound > 0) /// Add filter, which leaves rows with timestamp >= lateness_bound { auto filter_function = makeASTFunction( @@ -1540,7 +1545,18 @@ void StorageWindowView::writeIntoWindowView( builder.addSimpleTransform([&](const Block & stream_header) { - return std::make_shared(std::move(chunk_infos), stream_header); + // Can't move chunk_infos here, that function could be called several times + return std::make_shared(chunk_infos.clone(), stream_header); + }); + + String window_view_id = window_view.getStorageID().hasUUID() ? toString(window_view.getStorageID().uuid) : window_view.getStorageID().getFullNameNotQuoted(); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(window_view_id, stream_header); + }); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(stream_header); }); builder.addSimpleTransform([&](const Block & stream_header) @@ -1548,6 +1564,7 @@ void StorageWindowView::writeIntoWindowView( return std::make_shared("StorageWindowView: Afrer tmp table before squasing", true, stream_header); }); + builder.addSimpleTransform([&](const Block & current_header) { return std::make_shared( @@ -1561,7 +1578,7 @@ void StorageWindowView::writeIntoWindowView( UInt32 block_max_timestamp = 0; if (window_view.is_watermark_bounded || window_view.allowed_lateness) { - const auto & timestamp_column = *header.getByName(window_view.timestamp_column_name).column; + const auto & timestamp_column = *block.getByName(window_view.timestamp_column_name).column; const auto & timestamp_data = typeid_cast(timestamp_column).getData(); for (const auto & timestamp : timestamp_data) block_max_timestamp = std::max(timestamp, block_max_timestamp); @@ -1569,6 +1586,9 @@ void StorageWindowView::writeIntoWindowView( if (block_max_timestamp) window_view.updateMaxTimestamp(block_max_timestamp); + + LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: block_max_timestamp {}", + block_max_timestamp); } UInt32 lateness_upper_bound = 0; diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 56a21279b86..14ac65091d3 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -166,7 +166,7 @@ public: BlockIO populate(); - static void writeIntoWindowView(StorageWindowView & window_view, const Block & header, Chunk & chunk, ContextPtr context); + static void writeIntoWindowView(StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); ASTPtr getMergeableQuery() const { return mergeable_query->clone(); } diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference index 741591b0dd4..9c9281dc7e4 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference @@ -1,8 +1,8 @@ 2 3 -2 -2 +3 +3 1 1 diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql index 0a41581025a..51e6a513608 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql @@ -29,7 +29,7 @@ INSERT INTO without_deduplication VALUES (43); SELECT count() FROM with_deduplication; SELECT count() FROM without_deduplication; --- Implicit insert is deduplicated even for MV without_deduplication_mv +-- Implicit insert isn't deduplicated, because deduplicate_blocks_in_dependent_materialized_views = 0 by default SELECT ''; SELECT countMerge(cnt) FROM with_deduplication_mv; SELECT countMerge(cnt) FROM without_deduplication_mv; From 7fe4e675707c1e27edf2a06f3779768a483e6c21 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 21 May 2024 19:02:50 +0200 Subject: [PATCH 062/273] accept test 02457_insert_select_progress_http --- src/Interpreters/InterpreterInsertQuery.cpp | 56 +++++++++---------- src/Interpreters/SquashingTransform.cpp | 37 ++++++------ src/Interpreters/SquashingTransform.h | 7 +-- .../Transforms/SquashingChunksTransform.cpp | 13 +++-- .../MergeTree/ReplicatedMergeTreeSink.cpp | 7 --- 5 files changed, 54 insertions(+), 66 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 0f3df3752cb..339f68258dc 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -545,6 +545,34 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & } } + auto actions_dag = ActionsDAG::makeConvertingActions( + pipeline.getHeader().getColumnsWithTypeAndName(), + query_sample_block.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header, actions); + }); + + /// We need to convert Sparse columns to full, because it's destination storage + /// may not support it or may have different settings for applying Sparse serialization. + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + auto context_ptr = getContext(); + auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + + return counting; + }); + pipeline.resize(1); if (shouldAddSquashingFroStorage(table)) @@ -595,34 +623,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & pipeline.resize(presink_chains.size()); - auto actions_dag = ActionsDAG::makeConvertingActions( - pipeline.getHeader().getColumnsWithTypeAndName(), - query_sample_block.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header, actions); - }); - - /// We need to convert Sparse columns to full, because it's destination storage - /// may not support it or may have different settings for applying Sparse serialization. - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - auto context_ptr = getContext(); - auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - - return counting; - }); - for (auto & chain : presink_chains) pipeline.addResources(chain.detachResources()); pipeline.addChains(std::move(presink_chains)); diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index cf4f2060414..8a902add9a5 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -1,5 +1,6 @@ #include +#include namespace DB { @@ -16,23 +17,6 @@ SquashingTransform::SquashingTransform(size_t min_block_size_rows_, size_t min_b } SquashingTransform::SquashResult SquashingTransform::add(Block && input_block) -{ - return addImpl(std::move(input_block)); -} - -SquashingTransform::SquashResult SquashingTransform::add(const Block & input_block) -{ - return addImpl(input_block); -} - -/* - * To minimize copying, accept two types of argument: const reference for output - * stream, and rvalue reference for input stream, and decide whether to copy - * inside this function. This allows us not to copy Block unless we absolutely - * have to. - */ -template -SquashingTransform::SquashResult SquashingTransform::addImpl(ReferenceType input_block) { /// End of input stream. if (!input_block) @@ -66,7 +50,7 @@ SquashingTransform::SquashResult SquashingTransform::addImpl(ReferenceType input return SquashResult{std::move(to_return), true}; } - append(std::move(input_block)); + append(std::move(input_block)); if (isEnoughSize(accumulated_block)) { Block to_return; @@ -79,8 +63,7 @@ SquashingTransform::SquashResult SquashingTransform::addImpl(ReferenceType input } -template -void SquashingTransform::append(ReferenceType input_block) +void SquashingTransform::append(Block && input_block) { if (!accumulated_block) { @@ -88,6 +71,11 @@ void SquashingTransform::append(ReferenceType input_block) return; } + LOG_DEBUG(getLogger("SquashingTransform"), + "input_block rows {}, size {}, columns {}, accumulated_block rows {}, size {}, columns {}, ", + input_block.rows(), input_block.bytes(), input_block.columns(), + accumulated_block.rows(), accumulated_block.bytes(), accumulated_block.columns()); + assert(blocksHaveEqualStructure(input_block, accumulated_block)); try @@ -96,6 +84,15 @@ void SquashingTransform::append(ReferenceType input_block) { const auto source_column = input_block.getByPosition(i).column; + const auto acc_column = accumulated_block.getByPosition(i).column; + + LOG_DEBUG(getLogger("SquashingTransform"), + "column {} {}, acc rows {}, size {}, allocated {}, input rows {} size {} allocated {}", + i, source_column->getName(), + acc_column->size(), acc_column->byteSize(), acc_column->allocatedBytes(), + source_column->size(), source_column->byteSize(), source_column->allocatedBytes()); + + auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); accumulated_block.getByPosition(i).column = std::move(mutable_column); diff --git a/src/Interpreters/SquashingTransform.h b/src/Interpreters/SquashingTransform.h index f1eba537338..fff55a760db 100644 --- a/src/Interpreters/SquashingTransform.h +++ b/src/Interpreters/SquashingTransform.h @@ -34,7 +34,6 @@ public: * At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true. */ SquashResult add(Block && block); - SquashResult add(const Block & block); private: size_t min_block_size_rows; @@ -42,11 +41,7 @@ private: Block accumulated_block; - template - SquashResult addImpl(ReferenceType block); - - template - void append(ReferenceType block); + void append(Block && block); bool isEnoughSize(const Block & block); bool isEnoughSize(size_t rows, size_t bytes) const; diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 1a29b8d8a2d..ea0d63a2ed7 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -21,7 +21,7 @@ void SquashingChunksTransform::onConsume(Chunk chunk) "onConsume {}", chunk.getNumRows()); if (cur_chunkinfos.empty()) - cur_chunkinfos = chunk.getChunkInfos(); + cur_chunkinfos = chunk.getChunkInfos().clone(); auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); if (result.block) @@ -33,7 +33,7 @@ void SquashingChunksTransform::onConsume(Chunk chunk) if (cur_chunkinfos.empty() && result.input_block_delayed) { - cur_chunkinfos = chunk.getChunkInfos(); + cur_chunkinfos = chunk.getChunkInfos().clone(); } } @@ -79,12 +79,15 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( void SimpleSquashingChunksTransform::transform(Chunk & chunk) { LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - "transform {}, finished {}", chunk.getNumRows(), finished); + "transform rows {}, size {}, columns {}, infos: {}/{}, finished {}", + chunk.getNumRows(), chunk.bytes(), chunk.getNumColumns(), + chunk.getChunkInfos().size(), chunk.getChunkInfos().debug(), + finished); if (!finished) { if (cur_chunkinfos.empty()) - cur_chunkinfos = chunk.getChunkInfos(); + cur_chunkinfos = chunk.getChunkInfos().clone(); auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); if (result.block) @@ -96,7 +99,7 @@ void SimpleSquashingChunksTransform::transform(Chunk & chunk) if (cur_chunkinfos.empty() && result.input_block_delayed) { - cur_chunkinfos = chunk.getChunkInfos(); + cur_chunkinfos = chunk.getChunkInfos().clone(); } } else diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 41fdb86f3bd..11c64c97cb7 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -442,13 +442,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); - /// If deduplicated data should not be inserted into MV, we need to set proper - /// value for `last_block_is_duplicate`, which is possible only after the part is committed. - /// Othervide we can delay commit. - /// TODO: we can also delay commit if there is no MVs. - // if (!settings.deduplicate_blocks_in_dependent_materialized_views) - // finishDelayedChunk(zookeeper); - ++num_blocks_processed; } From ee3385fbc00151427a209398a881f882aef6512b Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 28 May 2024 18:56:41 +0200 Subject: [PATCH 063/273] adjust after merge with master --- src/Storages/ObjectStorage/StorageObjectStorageSink.cpp | 4 ++-- src/Storages/ObjectStorage/StorageObjectStorageSink.h | 2 +- src/Storages/StorageAzureBlob.cpp | 2 +- tests/integration/helpers/s3_mocks/broken_s3.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index 0a3cf19a590..9718b329414 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -39,12 +39,12 @@ StorageObjectStorageSink::StorageObjectStorageSink( configuration->format, *write_buf, sample_block, context, format_settings_); } -void StorageObjectStorageSink::consume(Chunk chunk) +void StorageObjectStorageSink::consume(Chunk & chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void StorageObjectStorageSink::onCancel() diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index 45cf83d606f..1ec52889f0a 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "StorageObjectStorageSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onCancel() override; diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 5dc407bf86d..a1ce991b5c9 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -1,4 +1,4 @@ -#include +#include #if USE_AZURE_BLOB_STORAGE #include diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py index 7d0127bc1c4..a8d407e8d79 100644 --- a/tests/integration/helpers/s3_mocks/broken_s3.py +++ b/tests/integration/helpers/s3_mocks/broken_s3.py @@ -231,7 +231,7 @@ class _ServerRuntime: class BrokenPipeAction: def inject_error(self, request_handler): # partial read - self.rfile.read(50) + request_handler.rfile.read(50) time.sleep(1) request_handler.connection.setsockopt( From 1b7db4195c1f0e5be62ab7a3784deebc2481f666 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 29 May 2024 02:18:50 +0200 Subject: [PATCH 064/273] work with tests --- src/Interpreters/InterpreterInsertQuery.cpp | 13 +- src/Interpreters/SquashingTransform.cpp | 8 +- src/Processors/ISimpleTransform.h | 2 - .../Transforms/CountingTransform.cpp | 3 +- .../Transforms/ExpressionTransform.cpp | 3 - .../Transforms/MaterializingTransform.cpp | 3 - .../Transforms/SquashingChunksTransform.cpp | 52 +- .../Transforms/buildPushingToViewsChain.cpp | 6 +- src/Storages/MergeTree/MergeTreeSink.cpp | 5 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 6 +- src/Storages/StorageAzureBlob.cpp | 1638 ------------ src/Storages/StorageS3.cpp | 2310 ----------------- .../0_stateless/01275_parallel_mv.reference | 4 +- ...01927_query_views_log_current_database.sql | 1 + ...ication_token_materialized_views.reference | 14 +- ...deduplication_token_materialized_views.sql | 8 +- .../0_stateless/02125_query_views_log.sql | 2 +- 17 files changed, 82 insertions(+), 3996 deletions(-) delete mode 100644 src/Storages/StorageAzureBlob.cpp delete mode 100644 src/Storages/StorageS3.cpp diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index cd68cbc41c0..249c69b51b9 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -574,6 +574,8 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & return counting; }); + size_t num_select_threads = pipeline.getNumThreads(); + pipeline.resize(1); if (shouldAddSquashingFroStorage(table)) @@ -616,8 +618,18 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & /// Otherwise ResizeProcessor them down to 1 stream. size_t presink_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); + size_t sink_streams_size = table->supportsParallelInsert() ? std::max(1, settings.max_insert_threads) : 1; + if (!settings.parallel_view_processing) + { + auto table_id = table->getStorageID(); + auto views = DatabaseCatalog::instance().getDependentViews(table_id); + + if (table->isView() || !views.empty()) + sink_streams_size = 1; + } + auto [presink_chains, sink_chains] = buildPreAndSyncChains( presink_streams_size, sink_streams_size, table, metadata_snapshot, query_sample_block); @@ -636,7 +648,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & if (!settings.parallel_view_processing) { - size_t num_select_threads = pipeline.getNumThreads(); /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. if (pipeline.getNumThreads() > num_select_threads) pipeline.setMaxThreads(num_select_threads); diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index 8a902add9a5..30c801aaaff 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -82,9 +83,8 @@ void SquashingTransform::append(Block && input_block) { for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) { - const auto source_column = input_block.getByPosition(i).column; - - const auto acc_column = accumulated_block.getByPosition(i).column; + const auto source_column = std::move(input_block.getByPosition(i).column); + auto acc_column = std::move(accumulated_block.getByPosition(i).column); LOG_DEBUG(getLogger("SquashingTransform"), "column {} {}, acc rows {}, size {}, allocated {}, input rows {} size {} allocated {}", @@ -93,7 +93,7 @@ void SquashingTransform::append(Block && input_block) source_column->size(), source_column->byteSize(), source_column->allocatedBytes()); - auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); + auto mutable_column = IColumn::mutate(std::move(acc_column)); mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); accumulated_block.getByPosition(i).column = std::move(mutable_column); } diff --git a/src/Processors/ISimpleTransform.h b/src/Processors/ISimpleTransform.h index 3862ea76dbb..a47e0e49121 100644 --- a/src/Processors/ISimpleTransform.h +++ b/src/Processors/ISimpleTransform.h @@ -31,8 +31,6 @@ protected: virtual void transform(Chunk & input_chunk, Chunk & output_chunk) { - LOG_DEBUG(getLogger("ISimpleTransform"), - "transform {}", input_chunk.getNumRows()); transform(input_chunk); output_chunk.swap(input_chunk); } diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 7329a196f8a..c138eed69de 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -3,6 +3,7 @@ #include #include #include +#include "IO/Progress.h" namespace ProfileEvents @@ -18,7 +19,7 @@ namespace DB void CountingTransform::onConsume(Chunk chunk) { LOG_DEBUG(getLogger("CountingTransform"), - "onConsume {}", chunk.getNumRows()); + "onConsume rows {} bytes {}, progress rows {} bytes {}", chunk.getNumRows(), chunk.bytes(), progress.written_rows, progress.written_bytes); if (quota) quota->used(QuotaType::WRITTEN_BYTES, chunk.bytes()); diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index db5d2b0c49c..73d41828bc0 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -21,9 +21,6 @@ ExpressionTransform::ExpressionTransform(const Block & header_, ExpressionAction void ExpressionTransform::transform(Chunk & chunk) { - LOG_DEBUG(getLogger("ExpressionTransform"), - "transform {}", chunk.getNumRows()); - size_t num_rows = chunk.getNumRows(); auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index 8366472f876..4a7f5187c75 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -12,9 +12,6 @@ MaterializingTransform::MaterializingTransform(const Block & header) void MaterializingTransform::transform(Chunk & chunk) { - LOG_DEBUG(getLogger("MaterializingTransform"), - "transform {}", chunk.getNumRows()); - auto num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 22171d97b6e..2ee13c05b95 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -20,20 +20,32 @@ void SquashingChunksTransform::onConsume(Chunk chunk) LOG_DEBUG(getLogger("SquashingChunksTransform"), "onConsume {}", chunk.getNumRows()); - if (cur_chunkinfos.empty()) - cur_chunkinfos = chunk.getChunkInfos().clone(); - auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + cur_chunk = Chunk(result.block.getColumns(), result.block.rows()); + if (result.block) { cur_chunk.setColumns(result.block.getColumns(), result.block.rows()); - cur_chunk.setChunkInfos(std::move(cur_chunkinfos)); - cur_chunkinfos = {}; - } + if (result.input_block_delayed) + { + cur_chunk.setChunkInfos(std::move(cur_chunkinfos)); + cur_chunkinfos = std::move(chunk.getChunkInfos()); + } + else + { + cur_chunk.setChunkInfos(chunk.getChunkInfos()); + cur_chunkinfos = {}; + } - if (cur_chunkinfos.empty() && result.input_block_delayed) + LOG_DEBUG(getLogger("SquashingChunksTransform"), + "got result rows {}, size {}, columns {}, infos: {}/{}", + cur_chunk.getNumRows(), cur_chunk.bytes(), cur_chunk.getNumColumns(), + cur_chunk.getChunkInfos().size(), cur_chunk.getChunkInfos().debug()); + } + else { - cur_chunkinfos = chunk.getChunkInfos().clone(); + assert(!result.input_block_delayed); + cur_chunkinfos = std::move(chunk.getChunkInfos()); } } @@ -85,15 +97,29 @@ void SimpleSquashingChunksTransform::consume(Chunk chunk) auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); - squashed_chunk.setColumns(result.block.getColumns(), result.block.rows()); - if (result.input_block_delayed) + if (result.block) { - squashed_chunk.setChunkInfos(std::move(squashed_info)); - squashed_info = std::move(chunk.getChunkInfos()); + squashed_chunk.setColumns(result.block.getColumns(), result.block.rows()); + if (result.input_block_delayed) + { + squashed_chunk.setChunkInfos(std::move(squashed_info)); + squashed_info = std::move(chunk.getChunkInfos()); + } + else + { + squashed_chunk.setChunkInfos(chunk.getChunkInfos()); + squashed_info = {}; + } + + LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), + "got result rows {}, size {}, columns {}, infos: {}/{}", + squashed_chunk.getNumRows(), squashed_chunk.bytes(), squashed_chunk.getNumColumns(), + squashed_chunk.getChunkInfos().size(), squashed_chunk.getChunkInfos().debug()); } else { - squashed_chunk.setChunkInfos(std::move(chunk.getChunkInfos())); + assert(!result.input_block_delayed); + squashed_info = std::move(chunk.getChunkInfos()); } } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 72897a06c44..d44796610ed 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -24,6 +24,7 @@ #include #include #include +#include "Core/Field.h" #include #include @@ -223,6 +224,7 @@ std::optional generateViewChain( if (disable_deduplication_for_children) { insert_context->setSetting("insert_deduplicate", Field{false}); + insert_context->setSetting("insert_deduplication_token", Field{""}); } // Processing of blocks for MVs is done block by block, and there will @@ -731,8 +733,8 @@ ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { - auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); - state.emplace(process(block, view, *views_data, chunk.getChunkInfos(), disable_deduplication_for_children)); + auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); + state.emplace(process(std::move(block), view, *views_data, std::move(chunk.getChunkInfos()), disable_deduplication_for_children)); } diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index c252d95a5e9..0953cdc5d72 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -100,8 +100,9 @@ void MergeTreeSink::consume(Chunk & chunk) if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo has to be initialized with user token for table: {}", - storage.getStorageID().getNameForLogs()); + "TokenInfo has to be initialized with user token for table: {}, user dedup token {}", + storage.getStorageID().getNameForLogs(), + context->getSettingsRef().insert_deduplication_token.value); if (token_info->tokenInitialized()) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 11c64c97cb7..62d30764ca8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -304,9 +304,9 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo has to be initialized with user token for table: {}", - storage.getStorageID().getNameForLogs()); - + "TokenInfo has to be initialized with user token for table: {} user dedup token {}", + storage.getStorageID().getNameForLogs(), + context->getSettingsRef().insert_deduplication_token.value); if (token_info->tokenInitialized()) { diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp deleted file mode 100644 index a1ce991b5c9..00000000000 --- a/src/Storages/StorageAzureBlob.cpp +++ /dev/null @@ -1,1638 +0,0 @@ -#include - -#if USE_AZURE_BLOB_STORAGE -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -#include - -namespace fs = std::filesystem; - -using namespace Azure::Storage::Blobs; - -namespace CurrentMetrics -{ - extern const Metric ObjectStorageAzureThreads; - extern const Metric ObjectStorageAzureThreadsActive; - extern const Metric ObjectStorageAzureThreadsScheduled; -} - -namespace ProfileEvents -{ - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_ARGUMENTS; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_COMPILE_REGEXP; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int CANNOT_DETECT_FORMAT; - extern const int LOGICAL_ERROR; - extern const int NOT_IMPLEMENTED; -} - -namespace -{ - -const std::unordered_set required_configuration_keys = { - "blob_path", - "container", -}; - -const std::unordered_set optional_configuration_keys = { - "format", - "compression", - "structure", - "compression_method", - "account_name", - "account_key", - "connection_string", - "storage_account_url", -}; - -bool isConnectionString(const std::string & candidate) -{ - return !candidate.starts_with("http"); -} - -} - -void StorageAzureBlob::processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection) -{ - validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); - - if (collection.has("connection_string")) - { - configuration.connection_url = collection.get("connection_string"); - configuration.is_connection_string = true; - } - - if (collection.has("storage_account_url")) - { - configuration.connection_url = collection.get("storage_account_url"); - configuration.is_connection_string = false; - } - - configuration.container = collection.get("container"); - configuration.blob_path = collection.get("blob_path"); - - if (collection.has("account_name")) - configuration.account_name = collection.get("account_name"); - - if (collection.has("account_key")) - configuration.account_key = collection.get("account_key"); - - configuration.structure = collection.getOrDefault("structure", "auto"); - configuration.format = collection.getOrDefault("format", configuration.format); - configuration.compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); -} - - -StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine_args, const ContextPtr & local_context) -{ - StorageAzureBlob::Configuration configuration; - - /// Supported signatures: - /// - /// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression]) - /// - - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - { - processNamedCollectionResult(configuration, *named_collection); - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); - - return configuration; - } - - if (engine_args.size() < 3 || engine_args.size() > 7) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage AzureBlobStorage requires 3 to 7 arguments: " - "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression])"); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - std::unordered_map engine_args_to_idx; - - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); - - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); - configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); - - auto is_format_arg = [] (const std::string & s) -> bool - { - return s == "auto" || FormatFactory::instance().exists(s); - }; - - if (engine_args.size() == 4) - { - //'c1 UInt64, c2 UInt64 - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format or account name specified without account key"); - } - } - else if (engine_args.size() == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - } - } - else if (engine_args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); - } - else - { - configuration.account_name = fourth_arg; - - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - } - } - else if (engine_args.size() == 7) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); - } - } - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.blob_path).value_or("auto"); - - return configuration; -} - - -AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(const ContextPtr & local_context) -{ - const auto & context_settings = local_context->getSettingsRef(); - auto settings_ptr = std::make_unique(); - settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; - settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; - settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); - - return settings_ptr; -} - -void registerStorageAzureBlob(StorageFactory & factory) -{ - factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args) - { - auto & engine_args = args.engine_args; - if (engine_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - - auto configuration = StorageAzureBlob::getConfiguration(engine_args, args.getLocalContext()); - auto client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); - // Use format settings from global server context + settings from - // the SETTINGS clause of the create query. Settings from current - // session and user are ignored. - std::optional format_settings; - if (args.storage_def->settings) - { - FormatFactorySettings user_format_settings; - - // Apply changed settings from global context, but ignore the - // unknown ones, because we only have the format settings here. - const auto & changes = args.getContext()->getSettingsRef().changes(); - for (const auto & change : changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.set(change.name, change.value); - } - - // Apply changes from SETTINGS clause, with validation. - user_format_settings.applyChanges(args.storage_def->settings->changes); - format_settings = getFormatSettings(args.getContext(), user_format_settings); - } - else - { - format_settings = getFormatSettings(args.getContext()); - } - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - auto settings = StorageAzureBlob::createSettings(args.getContext()); - - return std::make_shared( - std::move(configuration), - std::make_unique("AzureBlobStorage", std::move(client), std::move(settings),configuration.container), - args.getContext(), - args.table_id, - args.columns, - args.constraints, - args.comment, - format_settings, - /* distributed_processing */ false, - partition_by); - }, - { - .supports_settings = true, - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::AZURE, - }); -} - -static bool containerExists(std::unique_ptr &blob_service_client, std::string container_name) -{ - Azure::Storage::Blobs::ListBlobContainersOptions options; - options.Prefix = container_name; - options.PageSizeHint = 1; - - auto containers_list_response = blob_service_client->ListBlobContainers(options); - auto containers_list = containers_list_response.BlobContainers; - - for (const auto & container : containers_list) - { - if (container_name == container.Name) - return true; - } - return false; -} - -AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container) -{ - AzureClientPtr result; - - if (configuration.is_connection_string) - { - std::shared_ptr managed_identity_credential = std::make_shared(); - std::unique_ptr blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(configuration.connection_url)); - result = std::make_unique(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container)); - - if (attempt_to_create_container) - { - bool container_exists = containerExists(blob_service_client,configuration.container); - if (!container_exists) - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - - try - { - result->CreateIfNotExists(); - } - catch (const Azure::Storage::StorageException & e) - { - if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.")) - { - throw; - } - } - } - } - } - else - { - std::shared_ptr storage_shared_key_credential; - if (configuration.account_name.has_value() && configuration.account_key.has_value()) - { - storage_shared_key_credential - = std::make_shared(*configuration.account_name, *configuration.account_key); - } - - std::unique_ptr blob_service_client; - size_t pos = configuration.connection_url.find('?'); - std::shared_ptr managed_identity_credential; - if (storage_shared_key_credential) - { - blob_service_client = std::make_unique(configuration.connection_url, storage_shared_key_credential); - } - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, workload_identity_credential); - } - else - { - managed_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(configuration.connection_url, managed_identity_credential); - } - } - - std::string final_url; - if (pos != std::string::npos) - { - auto url_without_sas = configuration.connection_url.substr(0, pos); - final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + configuration.container - + configuration.connection_url.substr(pos); - } - else - final_url - = configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container; - - if (!attempt_to_create_container) - { - if (storage_shared_key_credential) - return std::make_unique(final_url, storage_shared_key_credential); - else - return std::make_unique(final_url, managed_identity_credential); - } - - bool container_exists = containerExists(blob_service_client,configuration.container); - if (container_exists) - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - try - { - result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } - catch (const Azure::Storage::StorageException & e) - { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.") - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - throw; - } - } - } - } - - return result; -} - -Poco::URI StorageAzureBlob::Configuration::getConnectionURL() const -{ - if (!is_connection_string) - return Poco::URI(connection_url); - - auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url); - return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); -} - -bool StorageAzureBlob::Configuration::withGlobsIgnorePartitionWildcard() const -{ - if (!withPartitionWildcard()) - return withGlobs(); - - return PartitionedSink::replaceWildcards(getPath(), "").find_first_of("*?{") != std::string::npos; -} - -StorageAzureBlob::StorageAzureBlob( - const Configuration & configuration_, - std::unique_ptr && object_storage_, - const ContextPtr & context, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , name("AzureBlobStorage") - , configuration(configuration_) - , object_storage(std::move(object_storage_)) - , distributed_processing(distributed_processing_) - , format_settings(format_settings_) - , partition_by(partition_by_) -{ - if (configuration.format != "auto") - FormatFactory::instance().checkFormatName(configuration.format); - context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.getConnectionURL()); - - StorageInMemoryMetadata storage_metadata; - if (columns_.empty()) - { - ColumnsDescription columns; - if (configuration.format == "auto") - std::tie(columns, configuration.format) = getTableStructureAndFormatFromData(object_storage.get(), configuration, format_settings, context); - else - columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context); - storage_metadata.setColumns(columns); - } - else - { - if (configuration.format == "auto") - configuration.format = getTableStructureAndFormatFromData(object_storage.get(), configuration, format_settings, context).second; - - /// We don't allow special columns in File storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine AzureBlobStorage doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); - - StoredObjects objects; - for (const auto & key : configuration.blobs_paths) - objects.emplace_back(key); -} - -void StorageAzureBlob::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) -{ - if (configuration.withGlobs()) - { - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode", - configuration.blob_path); - } - - StoredObjects objects; - for (const auto & key : configuration.blobs_paths) - objects.emplace_back(key); - - object_storage->removeObjectsIfExist(objects); -} - -namespace -{ - -class StorageAzureBlobSink : public SinkToStorage -{ -public: - StorageAzureBlobSink( - const String & format, - const Block & sample_block_, - const ContextPtr & context, - std::optional format_settings_, - const CompressionMethod compression_method, - AzureObjectStorage * object_storage, - const String & blob_path) - : SinkToStorage(sample_block_) - , sample_block(sample_block_) - , format_settings(format_settings_) - { - StoredObject object(blob_path); - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - object_storage->writeObject(object, WriteMode::Rewrite), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings); - } - - String getName() const override { return "StorageAzureBlobSink"; } - - void consume(Chunk & chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf->finalize(); - } - - Block sample_block; - std::optional format_settings; - std::unique_ptr write_buf; - OutputFormatPtr writer; - bool cancelled = false; - std::mutex cancel_mutex; -}; - -namespace -{ - std::optional checkAndGetNewFileOnInsertIfNeeded(const ContextPtr & context, AzureObjectStorage * object_storage, const String & path, size_t sequence_number) - { - if (context->getSettingsRef().azure_truncate_on_insert || !object_storage->exists(StoredObject(path))) - return std::nullopt; - - if (context->getSettingsRef().azure_create_new_file_on_insert) - { - auto pos = path.find_first_of('.'); - String new_path; - do - { - new_path = path.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : path.substr(pos)); - ++sequence_number; - } - while (object_storage->exists(StoredObject(new_path))); - - return new_path; - } - - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object with key {} already exists. " - "If you want to overwrite it, enable setting azure_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting azure_create_new_file_on_insert", - path); - } -} - -class PartitionedStorageAzureBlobSink : public PartitionedSink, WithContext -{ -public: - PartitionedStorageAzureBlobSink( - const ASTPtr & partition_by, - const String & format_, - const Block & sample_block_, - const ContextPtr & context_, - std::optional format_settings_, - const CompressionMethod compression_method_, - AzureObjectStorage * object_storage_, - const String & blob_) - : PartitionedSink(partition_by, context_, sample_block_), WithContext(context_) - , format(format_) - , sample_block(sample_block_) - , compression_method(compression_method_) - , object_storage(object_storage_) - , blob(blob_) - , format_settings(format_settings_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto partition_key = replaceWildcards(blob, partition_id); - validateKey(partition_key); - if (auto new_path = checkAndGetNewFileOnInsertIfNeeded(getContext(), object_storage, partition_key, 1)) - partition_key = *new_path; - - return std::make_shared( - format, - sample_block, - getContext(), - format_settings, - compression_method, - object_storage, - partition_key - ); - } - -private: - const String format; - const Block sample_block; - const CompressionMethod compression_method; - AzureObjectStorage * object_storage; - const String blob; - const std::optional format_settings; - - ExpressionActionsPtr partition_by_expr; - - static void validateKey(const String & str) - { - validatePartitionKey(str, true); - } -}; - -} - -class ReadFromAzureBlob : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromAzureBlob"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromAzureBlob( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::shared_ptr storage_, - ReadFromFormatInfo info_, - const bool need_only_count_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) - , storage(std::move(storage_)) - , info(std::move(info_)) - , need_only_count(need_only_count_) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - } - -private: - std::shared_ptr storage; - ReadFromFormatInfo info; - const bool need_only_count; - - size_t max_block_size; - const size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - -void ReadFromAzureBlob::applyFilters(ActionDAGNodes added_filter_nodes) -{ - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); -} - -void StorageAzureBlob::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr local_context, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - if (partition_by && configuration.withPartitionWildcard()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned Azure storage is not implemented yet"); - - auto this_ptr = std::static_pointer_cast(shared_from_this()); - - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && local_context->getSettingsRef().optimize_count_from_files; - - auto reading = std::make_unique( - column_names, - query_info, - storage_snapshot, - local_context, - read_from_format_info.source_header, - std::move(this_ptr), - std::move(read_from_format_info), - need_only_count, - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromAzureBlob::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - const auto & configuration = storage->configuration; - - if (storage->distributed_processing) - { - iterator_wrapper = std::make_shared(context, - context->getReadTaskCallback()); - } - else if (configuration.withGlobs()) - { - /// Iterate through disclosed globs and make a source for each file - iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blob_path, - predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); - } - else - { - iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blobs_paths, - predicate, storage->getVirtualsList(), context, nullptr, context->getFileProgressCallback()); - } -} - -void ReadFromAzureBlob::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - createIterator(nullptr); - - const auto & configuration = storage->configuration; - Pipes pipes; - - for (size_t i = 0; i < num_streams; ++i) - { - pipes.emplace_back(std::make_shared( - info, - configuration.format, - getName(), - context, - storage->format_settings, - max_block_size, - configuration.compression_method, - storage->object_storage.get(), - configuration.container, - configuration.connection_url, - iterator_wrapper, - need_only_count)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageAzureBlob::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) -{ - if (configuration.withGlobsIgnorePartitionWildcard()) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode", configuration.blob_path); - - auto path = configuration.blobs_paths.front(); - auto sample_block = metadata_snapshot->getSampleBlock(); - auto chosen_compression_method = chooseCompressionMethod(path, configuration.compression_method); - auto insert_query = std::dynamic_pointer_cast(query); - - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && configuration.withPartitionWildcard(); - - if (is_partitioned_implementation) - { - return std::make_shared( - partition_by_ast, - configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - object_storage.get(), - path); - } - else - { - if (auto new_path = checkAndGetNewFileOnInsertIfNeeded(local_context, object_storage.get(), path, configuration.blobs_paths.size())) - { - configuration.blobs_paths.push_back(*new_path); - path = *new_path; - } - - return std::make_shared( - configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - object_storage.get(), - path); - } -} - -bool StorageAzureBlob::supportsPartitionBy() const -{ - return true; -} - -bool StorageAzureBlob::supportsSubsetOfColumns(const ContextPtr & context) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context, format_settings); -} - -bool StorageAzureBlob::prefersLargeBlocks() const -{ - return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration.format); -} - -bool StorageAzureBlob::parallelizeOutputAfterReading(ContextPtr context) const -{ - return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context); -} - -StorageAzureBlobSource::GlobIterator::GlobIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - String blob_path_with_globs_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context_, - RelativePathsWithMetadata * outer_blobs_, - std::function file_progress_callback_) - : IIterator(context_) - , object_storage(object_storage_) - , container(container_) - , blob_path_with_globs(blob_path_with_globs_) - , virtual_columns(virtual_columns_) - , outer_blobs(outer_blobs_) - , file_progress_callback(file_progress_callback_) -{ - - const String key_prefix = blob_path_with_globs.substr(0, blob_path_with_globs.find_first_of("*?{")); - - /// We don't have to list bucket, because there is no asterisks. - if (key_prefix.size() == blob_path_with_globs.size()) - { - auto object_metadata = object_storage->getObjectMetadata(blob_path_with_globs); - blobs_with_metadata.emplace_back( - blob_path_with_globs, - object_metadata); - if (outer_blobs) - outer_blobs->emplace_back(blobs_with_metadata.back()); - if (file_progress_callback) - file_progress_callback(FileProgress(0, object_metadata.size_bytes)); - is_finished = true; - return; - } - - object_storage_iterator = object_storage->iterate(key_prefix); - - matcher = std::make_unique(makeRegexpPatternFromGlobs(blob_path_with_globs)); - - if (!matcher->ok()) - throw Exception( - ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", blob_path_with_globs, matcher->error()); - - recursive = blob_path_with_globs == "/**" ? true : false; - - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); -} - -RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next() -{ - std::lock_guard lock(next_mutex); - - if (is_finished && index >= blobs_with_metadata.size()) - { - return {}; - } - - bool need_new_batch = blobs_with_metadata.empty() || index >= blobs_with_metadata.size(); - - if (need_new_batch) - { - RelativePathsWithMetadata new_batch; - while (new_batch.empty()) - { - auto result = object_storage_iterator->getCurrrentBatchAndScheduleNext(); - if (result.has_value()) - { - new_batch = result.value(); - } - else - { - is_finished = true; - return {}; - } - - for (auto it = new_batch.begin(); it != new_batch.end();) - { - if (!recursive && !re2::RE2::FullMatch(it->relative_path, *matcher)) - it = new_batch.erase(it); - else - ++it; - } - } - - index = 0; - - if (filter_dag) - { - std::vector paths; - paths.reserve(new_batch.size()); - for (auto & path_with_metadata : new_batch) - paths.push_back(fs::path(container) / path_with_metadata.relative_path); - - VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); - } - - if (outer_blobs) - outer_blobs->insert(outer_blobs->end(), new_batch.begin(), new_batch.end()); - - blobs_with_metadata = std::move(new_batch); - if (file_progress_callback) - { - for (const auto & [relative_path, info] : blobs_with_metadata) - { - file_progress_callback(FileProgress(0, info.size_bytes)); - } - } - } - - size_t current_index = index++; - if (current_index >= blobs_with_metadata.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Index out of bound for blob metadata"); - return blobs_with_metadata[current_index]; -} - -StorageAzureBlobSource::KeysIterator::KeysIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - const Strings & keys_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context_, - RelativePathsWithMetadata * outer_blobs, - std::function file_progress_callback) - : IIterator(context_) - , object_storage(object_storage_) - , container(container_) - , virtual_columns(virtual_columns_) -{ - Strings all_keys = keys_; - - ASTPtr filter_ast; - if (!all_keys.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - Strings paths; - paths.reserve(all_keys.size()); - for (const auto & key : all_keys) - paths.push_back(fs::path(container) / key); - - VirtualColumnUtils::filterByPathOrFile(all_keys, paths, filter_dag, virtual_columns, getContext()); - } - - for (auto && key : all_keys) - { - ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); - if (file_progress_callback) - file_progress_callback(FileProgress(0, object_metadata.size_bytes)); - keys.emplace_back(key, object_metadata); - } - - if (outer_blobs) - *outer_blobs = keys; -} - -RelativePathWithMetadata StorageAzureBlobSource::KeysIterator::next() -{ - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= keys.size()) - return {}; - - return keys[current_index]; -} - -Chunk StorageAzureBlobSource::generate() -{ - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (const auto * input_format = reader.getInputFormat()) - chunk_size = input_format->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, - requested_virtual_columns, - fs::path(container) / reader.getRelativePath(), - reader.getRelativePathWithMetadata().metadata.size_bytes); - return chunk; - } - - if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getRelativePath(), total_rows_in_file); - - total_rows_in_file = 0; - - assert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - break; - - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - create_reader_pool.wait(); - reader_future = createReaderAsync(); - } - - return {}; -} - -void StorageAzureBlobSource::addNumRowsToCache(const String & path, size_t num_rows) -{ - String source = fs::path(connection_url) / container / path; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional StorageAzureBlobSource::tryGetNumRowsFromCache(const DB::RelativePathWithMetadata & path_with_metadata) -{ - String source = fs::path(connection_url) / container / path_with_metadata.relative_path; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - auto last_mod = path_with_metadata.metadata.last_modified; - if (last_mod) - return last_mod->epochTime(); - return std::nullopt; - }; - - return StorageAzureBlob::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -StorageAzureBlobSource::StorageAzureBlobSource( - const ReadFromFormatInfo & info, - const String & format_, - String name_, - const ContextPtr & context_, - std::optional format_settings_, - UInt64 max_block_size_, - String compression_hint_, - AzureObjectStorage * object_storage_, - const String & container_, - const String & connection_url_, - std::shared_ptr file_iterator_, - bool need_only_count_) - :ISource(info.source_header, false) - , WithContext(context_) - , requested_columns(info.requested_columns) - , requested_virtual_columns(info.requested_virtual_columns) - , format(format_) - , name(std::move(name_)) - , sample_block(info.format_header) - , format_settings(format_settings_) - , columns_desc(info.columns_description) - , max_block_size(max_block_size_) - , compression_hint(compression_hint_) - , object_storage(std::move(object_storage_)) - , container(container_) - , connection_url(connection_url_) - , file_iterator(file_iterator_) - , need_only_count(need_only_count_) - , create_reader_pool(CurrentMetrics::ObjectStorageAzureThreads, CurrentMetrics::ObjectStorageAzureThreadsActive, CurrentMetrics::ObjectStorageAzureThreadsScheduled, 1) - , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(create_reader_pool, "AzureReader")) -{ - reader = createReader(); - if (reader) - reader_future = createReaderAsync(); -} - - -StorageAzureBlobSource::~StorageAzureBlobSource() -{ - create_reader_pool.wait(); -} - -String StorageAzureBlobSource::getName() const -{ - return name; -} - -StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader() -{ - auto path_with_metadata = file_iterator->next(); - if (path_with_metadata.relative_path.empty()) - return {}; - - if (path_with_metadata.metadata.size_bytes == 0) - path_with_metadata.metadata = object_storage->getObjectMetadata(path_with_metadata.relative_path); - - QueryPipelineBuilder builder; - std::shared_ptr source; - std::unique_ptr read_buf; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files - ? tryGetNumRowsFromCache(path_with_metadata) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - source = std::make_shared(sample_block, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - std::optional max_parsing_threads; - if (need_only_count) - max_parsing_threads = 1; - - auto compression_method = chooseCompressionMethod(path_with_metadata.relative_path, compression_hint); - read_buf = createAzureReadBuffer(path_with_metadata.relative_path, path_with_metadata.metadata.size_bytes); - auto input_format = FormatFactory::instance().getInput( - format, *read_buf, sample_block, getContext(), max_block_size, - format_settings, max_parsing_threads, std::nullopt, - /* is_remote_fs */ true, compression_method); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - - if (columns_desc.hasDefaults()) - { - builder.addSimpleTransform( - [&](const Block & header) - { return std::make_shared(header, columns_desc, *input_format, getContext()); }); - } - - source = input_format; - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - auto current_reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - - return ReaderHolder{path_with_metadata, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)}; -} - -std::future StorageAzureBlobSource::createReaderAsync() -{ - return create_reader_scheduler([this] { return createReader(); }, Priority{}); -} - -std::unique_ptr StorageAzureBlobSource::createAzureReadBuffer(const String & key, size_t object_size) -{ - auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); - read_settings.enable_filesystem_cache = false; - auto download_buffer_size = getContext()->getSettings().max_download_buffer_size; - const bool object_too_small = object_size <= 2 * download_buffer_size; - - // Create a read buffer that will prefetch the first ~1 MB of the file. - // When reading lots of tiny files, this prefetching almost doubles the throughput. - // For bigger files, parallel reading is more useful. - if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - LOG_TRACE(log, "Downloading object of size {} from Azure with initial prefetch", object_size); - return createAsyncAzureReadBuffer(key, read_settings, object_size); - } - - return object_storage->readObject(StoredObject(key), read_settings, {}, object_size); -} - -namespace -{ - class ReadBufferIterator : public IReadBufferIterator, WithContext - { - public: - ReadBufferIterator( - const std::shared_ptr & file_iterator_, - AzureObjectStorage * object_storage_, - std::optional format_, - const StorageAzureBlob::Configuration & configuration_, - const std::optional & format_settings_, - const RelativePathsWithMetadata & read_keys_, - const ContextPtr & context_) - : WithContext(context_) - , file_iterator(file_iterator_) - , object_storage(object_storage_) - , configuration(configuration_) - , format(std::move(format_)) - , format_settings(format_settings_) - , read_keys(read_keys_) - , prev_read_keys_size(read_keys_.size()) - { - } - - Data next() override - { - /// For default mode check cached columns for currently read keys on first iteration. - if (first) - { - /// If format is unknown we iterate through all currently read keys on first iteration and - /// try to determine format by file name. - if (!format) - { - for (const auto & key : read_keys) - { - if (auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(key.relative_path)) - { - format = format_from_path; - break; - } - } - } - - /// For default mode check cached columns for currently read keys on first iteration. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns, format}; - } - } - - current_path_with_metadata = file_iterator->next(); - - if (current_path_with_metadata.relative_path.empty()) - { - if (first) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files with provided path " - "in AzureBlobStorage. You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because there are no files with provided path " - "in AzureBlobStorage. You can specify table structure manually"); - } - - return {nullptr, std::nullopt, format}; - } - - first = false; - - /// AzureBlobStorage file iterator could get new keys after new iteration. - if (read_keys.size() > prev_read_keys_size) - { - /// If format is unknown we can try to determine it by new file names. - if (!format) - { - for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) - { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it).relative_path)) - { - format = format_from_file_name; - break; - } - } - } - /// Check new files in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); - if (columns_from_cache) - return {nullptr, columns_from_cache, format}; - } - - prev_read_keys_size = read_keys.size(); - } - - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - RelativePathsWithMetadata paths = {current_path_with_metadata}; - if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end())) - return {nullptr, columns_from_cache, format}; - } - - first = false; - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - return {wrapReadBufferWithCompressionMethod( - object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes), - chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method), - zstd_window_log_max), std::nullopt, format}; - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure) - return; - - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; - auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; - auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addColumns(key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; - Strings sources; - sources.reserve(read_keys.size()); - std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; }); - auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override { return current_path_with_metadata.relative_path; } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - return wrapReadBufferWithCompressionMethod( - object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes), - chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method), - zstd_window_log_max); - } - - private: - std::optional tryGetColumnsFromCache(const RelativePathsWithMetadata::const_iterator & begin, const RelativePathsWithMetadata::const_iterator & end) - { - auto context = getContext(); - if (!context->getSettingsRef().schema_inference_use_cache_for_azure) - return std::nullopt; - - auto & schema_cache = StorageAzureBlob::getSchemaCache(context); - for (auto it = begin; it < end; ++it) - { - auto get_last_mod_time = [&] -> std::optional - { - if (it->metadata.last_modified) - return it->metadata.last_modified->epochTime(); - return std::nullopt; - }; - - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; - String source = host_and_bucket + '/' + it->relative_path; - if (format) - { - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; - } - else - { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry for some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) - { - auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } - } - } - } - - return std::nullopt; - } - - std::shared_ptr file_iterator; - AzureObjectStorage * object_storage; - const StorageAzureBlob::Configuration & configuration; - std::optional format; - const std::optional & format_settings; - const RelativePathsWithMetadata & read_keys; - size_t prev_read_keys_size; - RelativePathWithMetadata current_path_with_metadata; - bool first = true; - }; -} - -std::pair StorageAzureBlob::getTableStructureAndFormatFromDataImpl( - std::optional format, - AzureObjectStorage * object_storage, - const Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx) -{ - RelativePathsWithMetadata read_keys; - std::shared_ptr file_iterator; - if (configuration.withGlobs()) - { - file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blob_path, nullptr, NamesAndTypesList{}, ctx, &read_keys); - } - else - { - file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys); - } - - ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, format, configuration, format_settings, read_keys, ctx); - if (format) - return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, ctx), *format}; - return detectFormatAndReadSchema(format_settings, read_buffer_iterator, ctx); -} - -std::pair StorageAzureBlob::getTableStructureAndFormatFromData( - DB::AzureObjectStorage * object_storage, - const DB::StorageAzureBlob::Configuration & configuration, - const std::optional & format_settings, - const DB::ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(std::nullopt, object_storage, configuration, format_settings, ctx); -} - -ColumnsDescription StorageAzureBlob::getTableStructureFromData( - DB::AzureObjectStorage * object_storage, - const DB::StorageAzureBlob::Configuration & configuration, - const std::optional & format_settings, - const DB::ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(configuration.format, object_storage, configuration, format_settings, ctx).first; -} - -SchemaCache & StorageAzureBlob::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_azure", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} - - -std::unique_ptr StorageAzureBlobSource::createAsyncAzureReadBuffer( - const String & key, const ReadSettings & read_settings, size_t object_size) -{ - auto modified_settings{read_settings}; - modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; - auto async_reader = object_storage->readObjects(StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, modified_settings); - - async_reader->setReadUntilEnd(); - if (read_settings.remote_fs_prefetch) - async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); - - return async_reader; -} - -} - -#endif diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp deleted file mode 100644 index 7975b42ac02..00000000000 --- a/src/Storages/StorageS3.cpp +++ /dev/null @@ -1,2310 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "Common/logger_useful.h" -#include "IO/CompressionMethod.h" -#include "IO/ReadBuffer.h" -#include "Interpreters/Context_fwd.h" -#include "Storages/MergeTree/ReplicatedMergeTreePartHeader.h" - -#if USE_AWS_S3 - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#include -#pragma clang diagnostic pop - -namespace fs = std::filesystem; - - -namespace CurrentMetrics -{ - extern const Metric StorageS3Threads; - extern const Metric StorageS3ThreadsActive; - extern const Metric StorageS3ThreadsScheduled; -} - -namespace ProfileEvents -{ - extern const Event S3DeleteObjects; - extern const Event S3ListObjects; - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ - -static const std::unordered_set required_configuration_keys = { - "url", -}; -static const std::unordered_set optional_configuration_keys = { - "format", - "compression", - "compression_method", - "structure", - "access_key_id", - "secret_access_key", - "session_token", - "filename", - "use_environment_credentials", - "max_single_read_retries", - "min_upload_part_size", - "upload_part_size_multiply_factor", - "upload_part_size_multiply_parts_count_threshold", - "max_single_part_upload_size", - "max_connections", - "expiration_window_seconds", - "no_sign_request" -}; - -namespace ErrorCodes -{ - extern const int CANNOT_PARSE_TEXT; - extern const int BAD_ARGUMENTS; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int S3_ERROR; - extern const int UNEXPECTED_EXPRESSION; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int CANNOT_DETECT_FORMAT; - extern const int NOT_IMPLEMENTED; - extern const int CANNOT_COMPILE_REGEXP; - extern const int FILE_DOESNT_EXIST; - extern const int NO_ELEMENTS_IN_CONFIG; -} - - -class ReadFromStorageS3Step : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromStorageS3Step"; } - - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromStorageS3Step( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - StorageS3 & storage_, - ReadFromFormatInfo read_from_format_info_, - bool need_only_count_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) - , column_names(column_names_) - , storage(storage_) - , read_from_format_info(std::move(read_from_format_info_)) - , need_only_count(need_only_count_) - , query_configuration(storage.getConfigurationCopy()) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - query_configuration.update(context); - virtual_columns = storage.getVirtualsList(); - } - -private: - Names column_names; - StorageS3 & storage; - ReadFromFormatInfo read_from_format_info; - bool need_only_count; - StorageS3::Configuration query_configuration; - NamesAndTypesList virtual_columns; - - size_t max_block_size; - size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - - -class IOutputFormat; -using OutputFormatPtr = std::shared_ptr; - -class StorageS3Source::DisclosedGlobIterator::Impl : WithContext -{ -public: - Impl( - const S3::Client & client_, - const S3::URI & globbed_uri_, - const ActionsDAG::Node * predicate_, - const NamesAndTypesList & virtual_columns_, - ContextPtr context_, - KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_, - std::function file_progress_callback_) - : WithContext(context_) - , client(client_.clone()) - , globbed_uri(globbed_uri_) - , predicate(predicate_) - , virtual_columns(virtual_columns_) - , read_keys(read_keys_) - , request_settings(request_settings_) - , list_objects_pool( - CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) - , list_objects_scheduler(threadPoolCallbackRunnerUnsafe(list_objects_pool, "ListObjects")) - , file_progress_callback(file_progress_callback_) - { - if (globbed_uri.bucket.find_first_of("*?{") != std::string::npos) - throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name"); - - expanded_keys = expandSelectionGlob(globbed_uri.key); - expanded_keys_iter = expanded_keys.begin(); - - fillBufferForKey(*expanded_keys_iter); - expanded_keys_iter++; - } - - KeyWithInfoPtr next(size_t) - { - std::lock_guard lock(mutex); - return nextAssumeLocked(); - } - - size_t objectsCount() - { - return buffer.size(); - } - - bool hasMore() - { - if (buffer.empty()) - return !(expanded_keys_iter == expanded_keys.end() && is_finished_for_key); - else - return true; - } - - ~Impl() - { - list_objects_pool.wait(); - } - -private: - using ListObjectsOutcome = Aws::S3::Model::ListObjectsV2Outcome; - - void fillBufferForKey(const std::string & uri_key) - { - is_finished_for_key = false; - const String key_prefix = uri_key.substr(0, uri_key.find_first_of("*?{")); - - /// We don't have to list bucket, because there is no asterisks. - if (key_prefix.size() == uri_key.size()) - { - buffer.clear(); - buffer.emplace_back(std::make_shared(uri_key, std::nullopt)); - buffer_iter = buffer.begin(); - if (read_keys) - read_keys->insert(read_keys->end(), buffer.begin(), buffer.end()); - is_finished_for_key = true; - return; - } - - request = {}; - request.SetBucket(globbed_uri.bucket); - request.SetPrefix(key_prefix); - request.SetMaxKeys(static_cast(request_settings.list_object_keys_size)); - - outcome_future = listObjectsAsync(); - - matcher = std::make_unique(makeRegexpPatternFromGlobs(uri_key)); - if (!matcher->ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", uri_key, matcher->error()); - - recursive = globbed_uri.key == "/**"; - - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - fillInternalBufferAssumeLocked(); - } - - KeyWithInfoPtr nextAssumeLocked() - { - do - { - if (buffer_iter != buffer.end()) - { - auto answer = *buffer_iter; - ++buffer_iter; - - /// If url doesn't contain globs, we didn't list s3 bucket and didn't get object info for the key. - /// So we get object info lazily here on 'next()' request. - if (!answer->info) - { - try - { - answer->info = S3::getObjectInfo(*client, globbed_uri.bucket, answer->key, globbed_uri.version_id, request_settings); - } - catch (...) - { - /// if no such file AND there was no `{}` glob -- this is an exception - /// otherwise ignore it, this is acceptable - if (expanded_keys.size() == 1) - throw; - continue; - } - if (file_progress_callback) - file_progress_callback(FileProgress(0, answer->info->size)); - } - - return answer; - } - - if (is_finished_for_key) - { - if (expanded_keys_iter != expanded_keys.end()) - { - fillBufferForKey(*expanded_keys_iter); - expanded_keys_iter++; - continue; - } - else - return {}; - } - - try - { - fillInternalBufferAssumeLocked(); - } - catch (...) - { - /// In case of exception thrown while listing new batch of files - /// iterator may be partially initialized and its further using may lead to UB. - /// Iterator is used by several processors from several threads and - /// it may take some time for threads to stop processors and they - /// may still use this iterator after exception is thrown. - /// To avoid this UB, reset the buffer and return defaults for further calls. - is_finished_for_key = true; - buffer.clear(); - buffer_iter = buffer.begin(); - throw; - } - } while (true); - } - - void fillInternalBufferAssumeLocked() - { - buffer.clear(); - assert(outcome_future.valid()); - auto outcome = outcome_future.get(); - - if (!outcome.IsSuccess()) - { - throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", - quoteString(request.GetBucket()), quoteString(request.GetPrefix()), - backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); - } - - const auto & result_batch = outcome.GetResult().GetContents(); - - /// It returns false when all objects were returned - is_finished_for_key = !outcome.GetResult().GetIsTruncated(); - - if (!is_finished_for_key) - { - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - list_objects_pool.wait(); - outcome_future = listObjectsAsync(); - } - - if (request_settings.throw_on_zero_files_match && result_batch.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files using prefix {}", request.GetPrefix()); - - KeysWithInfo temp_buffer; - temp_buffer.reserve(result_batch.size()); - - for (const auto & row : result_batch) - { - String key = row.GetKey(); - if (recursive || re2::RE2::FullMatch(key, *matcher)) - { - S3::ObjectInfo info = - { - .size = size_t(row.GetSize()), - .last_modification_time = row.GetLastModified().Millis() / 1000, - }; - - temp_buffer.emplace_back(std::make_shared(std::move(key), std::move(info))); - } - } - - if (temp_buffer.empty()) - { - buffer_iter = buffer.begin(); - return; - } - - if (filter_dag) - { - std::vector paths; - paths.reserve(temp_buffer.size()); - for (const auto & key_with_info : temp_buffer) - paths.push_back(fs::path(globbed_uri.bucket) / key_with_info->key); - - VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, filter_dag, virtual_columns, getContext()); - } - - buffer = std::move(temp_buffer); - - if (file_progress_callback) - { - for (const auto & key_with_info : buffer) - file_progress_callback(FileProgress(0, key_with_info->info->size)); - } - - /// Set iterator only after the whole batch is processed - buffer_iter = buffer.begin(); - - if (read_keys) - read_keys->insert(read_keys->end(), buffer.begin(), buffer.end()); - } - - std::future listObjectsAsync() - { - return list_objects_scheduler([this] - { - ProfileEvents::increment(ProfileEvents::S3ListObjects); - auto outcome = client->ListObjectsV2(request); - - /// Outcome failure will be handled on the caller side. - if (outcome.IsSuccess()) - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - - return outcome; - }, Priority{}); - } - - std::mutex mutex; - - KeysWithInfo buffer; - KeysWithInfo::iterator buffer_iter; - - std::vector expanded_keys; - std::vector::iterator expanded_keys_iter; - - std::unique_ptr client; - S3::URI globbed_uri; - const ActionsDAG::Node * predicate; - ASTPtr query; - NamesAndTypesList virtual_columns; - ActionsDAGPtr filter_dag; - std::unique_ptr matcher; - bool recursive{false}; - bool is_finished_for_key{false}; - KeysWithInfo * read_keys; - - S3::ListObjectsV2Request request; - S3Settings::RequestSettings request_settings; - - ThreadPool list_objects_pool; - ThreadPoolCallbackRunnerUnsafe list_objects_scheduler; - std::future outcome_future; - std::function file_progress_callback; -}; - -StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( - const S3::Client & client_, - const S3::URI & globbed_uri_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - const ContextPtr & context, - KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_, - std::function file_progress_callback_) - : pimpl(std::make_shared( - client_, globbed_uri_, predicate, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_)) -{ -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::DisclosedGlobIterator::next(size_t idx) /// NOLINT -{ - return pimpl->next(idx); -} - -size_t StorageS3Source::DisclosedGlobIterator::estimatedKeysCount() -{ - if (pimpl->hasMore()) - { - /// 1000 files were listed, and we cannot make any estimation of _how many more_ there are (because we list bucket lazily); - /// If there are more objects in the bucket, limiting the number of streams is the last thing we may want to do - /// as it would lead to serious slow down of the execution, since objects are going - /// to be fetched sequentially rather than in-parallel with up to times. - return std::numeric_limits::max(); - } - else - return pimpl->objectsCount(); -} - -class StorageS3Source::KeysIterator::Impl -{ -public: - explicit Impl( - const S3::Client & client_, - const std::string & version_id_, - const std::vector & keys_, - const String & bucket_, - const S3Settings::RequestSettings & request_settings_, - KeysWithInfo * read_keys_, - std::function file_progress_callback_) - : keys(keys_) - , client(client_.clone()) - , version_id(version_id_) - , bucket(bucket_) - , request_settings(request_settings_) - , file_progress_callback(file_progress_callback_) - { - if (read_keys_) - { - for (const auto & key : keys) - read_keys_->push_back(std::make_shared(key)); - } - } - - KeyWithInfoPtr next(size_t) - { - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= keys.size()) - return {}; - auto key = keys[current_index]; - std::optional info; - if (file_progress_callback) - { - info = S3::getObjectInfo(*client, bucket, key, version_id, request_settings); - file_progress_callback(FileProgress(0, info->size)); - } - - return std::make_shared(key, info); - } - - size_t objectsCount() - { - return keys.size(); - } - -private: - Strings keys; - std::atomic_size_t index = 0; - std::unique_ptr client; - String version_id; - String bucket; - S3Settings::RequestSettings request_settings; - std::function file_progress_callback; -}; - -StorageS3Source::KeysIterator::KeysIterator( - const S3::Client & client_, - const std::string & version_id_, - const std::vector & keys_, - const String & bucket_, - const S3Settings::RequestSettings & request_settings_, - KeysWithInfo * read_keys, - std::function file_progress_callback_) - : pimpl(std::make_shared( - client_, version_id_, keys_, bucket_, request_settings_, read_keys, file_progress_callback_)) -{ -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::KeysIterator::next(size_t idx) /// NOLINT -{ - return pimpl->next(idx); -} - -size_t StorageS3Source::KeysIterator::estimatedKeysCount() -{ - return pimpl->objectsCount(); -} - -StorageS3Source::ReadTaskIterator::ReadTaskIterator( - const DB::ReadTaskCallback & callback_, - size_t max_threads_count) - : callback(callback_) -{ - ThreadPool pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, max_threads_count); - auto pool_scheduler = threadPoolCallbackRunnerUnsafe(pool, "S3ReadTaskItr"); - - std::vector> keys; - keys.reserve(max_threads_count); - for (size_t i = 0; i < max_threads_count; ++i) - keys.push_back(pool_scheduler([this] { return callback(); }, Priority{})); - - pool.wait(); - buffer.reserve(max_threads_count); - for (auto & key_future : keys) - buffer.emplace_back(std::make_shared(key_future.get())); -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::ReadTaskIterator::next(size_t) /// NOLINT -{ - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= buffer.size()) - return std::make_shared(callback()); - - while (current_index < buffer.size()) - { - if (const auto & key_info = buffer[current_index]; key_info && !key_info->key.empty()) - return buffer[current_index]; - - current_index = index.fetch_add(1, std::memory_order_relaxed); - } - - return nullptr; -} - -size_t StorageS3Source::ReadTaskIterator::estimatedKeysCount() -{ - return buffer.size(); -} - - -StorageS3Source::ArchiveIterator::ArchiveIterator( - std::unique_ptr basic_iterator_, - const std::string & archive_pattern_, - std::shared_ptr client_, - const String & bucket_, - const String & version_id_, - const S3Settings::RequestSettings & request_settings_, - ContextPtr context_, - KeysWithInfo * read_keys_) - : WithContext(context_) - , basic_iterator(std::move(basic_iterator_)) - , basic_key_with_info_ptr(nullptr) - , client(client_) - , bucket(bucket_) - , version_id(version_id_) - , request_settings(request_settings_) - , read_keys(read_keys_) -{ - if (archive_pattern_.find_first_of("*?{") != std::string::npos) - { - auto matcher = std::make_shared(makeRegexpPatternFromGlobs(archive_pattern_)); - if (!matcher->ok()) - throw Exception( - ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", archive_pattern_, matcher->error()); - filter = IArchiveReader::NameFilter{[matcher](const std::string & p) mutable { return re2::RE2::FullMatch(p, *matcher); }}; - } - else - { - path_in_archive = archive_pattern_; - } -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::ArchiveIterator::next(size_t) -{ - if (!path_in_archive.empty()) - { - std::unique_lock lock{take_next_mutex}; - while (true) - { - basic_key_with_info_ptr = basic_iterator->next(); - if (!basic_key_with_info_ptr) - return {}; - refreshArchiveReader(); - bool file_exists = archive_reader->fileExists(path_in_archive); - if (file_exists) - { - KeyWithInfoPtr archive_key_with_info - = std::make_shared(basic_key_with_info_ptr->key, std::nullopt, path_in_archive, archive_reader); - if (read_keys != nullptr) - read_keys->push_back(archive_key_with_info); - return archive_key_with_info; - } - } - } - else - { - std::unique_lock lock{take_next_mutex}; - while (true) - { - if (!file_enumerator) - { - basic_key_with_info_ptr = basic_iterator->next(); - if (!basic_key_with_info_ptr) - return {}; - refreshArchiveReader(); - file_enumerator = archive_reader->firstFile(); - if (!file_enumerator) - { - file_enumerator.reset(); - continue; - } - } - else if (!file_enumerator->nextFile()) - { - file_enumerator.reset(); - continue; - } - - String current_filename = file_enumerator->getFileName(); - bool satisfies = filter(current_filename); - if (satisfies) - { - KeyWithInfoPtr archive_key_with_info - = std::make_shared(basic_key_with_info_ptr->key, std::nullopt, current_filename, archive_reader); - if (read_keys != nullptr) - read_keys->push_back(archive_key_with_info); - return archive_key_with_info; - } - } - } -} - -size_t StorageS3Source::ArchiveIterator::estimatedKeysCount() -{ - return basic_iterator->estimatedKeysCount(); -} - -void StorageS3Source::ArchiveIterator::refreshArchiveReader() -{ - if (basic_key_with_info_ptr) - { - if (!basic_key_with_info_ptr->info) - { - basic_key_with_info_ptr->info = S3::getObjectInfo(*client, bucket, basic_key_with_info_ptr->key, version_id, request_settings); - } - archive_reader = createArchiveReader( - basic_key_with_info_ptr->key, - [key = basic_key_with_info_ptr->key, archive_size = basic_key_with_info_ptr->info.value().size, context = getContext(), this]() - { return createS3ReadBuffer(key, archive_size, context, client, bucket, version_id, request_settings); }, - basic_key_with_info_ptr->info.value().size); - } - else - { - archive_reader = nullptr; - } -} - -StorageS3Source::StorageS3Source( - const ReadFromFormatInfo & info, - const String & format_, - String name_, - const ContextPtr & context_, - std::optional format_settings_, - UInt64 max_block_size_, - const S3Settings::RequestSettings & request_settings_, - String compression_hint_, - const std::shared_ptr & client_, - const String & bucket_, - const String & version_id_, - const String & url_host_and_port_, - std::shared_ptr file_iterator_, - const size_t max_parsing_threads_, - bool need_only_count_) - : SourceWithKeyCondition(info.source_header, false) - , WithContext(context_) - , name(std::move(name_)) - , bucket(bucket_) - , version_id(version_id_) - , url_host_and_port(url_host_and_port_) - , format(format_) - , columns_desc(info.columns_description) - , requested_columns(info.requested_columns) - , max_block_size(max_block_size_) - , request_settings(request_settings_) - , compression_hint(std::move(compression_hint_)) - , client(client_) - , sample_block(info.format_header) - , format_settings(format_settings_) - , requested_virtual_columns(info.requested_virtual_columns) - , file_iterator(file_iterator_) - , max_parsing_threads(max_parsing_threads_) - , need_only_count(need_only_count_) - , create_reader_pool( - CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) - , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(create_reader_pool, "CreateS3Reader")) -{ -} - -void StorageS3Source::lazyInitialize(size_t idx) -{ - if (initialized) - return; - - reader = createReader(idx); - if (reader) - reader_future = createReaderAsync(idx); - initialized = true; -} - -StorageS3Source::ReaderHolder StorageS3Source::createReader(size_t idx) -{ - KeyWithInfoPtr key_with_info; - do - { - key_with_info = file_iterator->next(idx); - if (!key_with_info || key_with_info->key.empty()) - return {}; - - if (!key_with_info->info) - key_with_info->info = S3::getObjectInfo(*client, bucket, key_with_info->key, version_id, request_settings); - } - while (getContext()->getSettingsRef().s3_skip_empty_files && key_with_info->info->size == 0); - - QueryPipelineBuilder builder; - std::shared_ptr source; - std::unique_ptr read_buf; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(*key_with_info) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - source = std::make_shared(sample_block, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - auto compression_method = CompressionMethod::None; - if (!key_with_info->path_in_archive.has_value()) - { - compression_method = chooseCompressionMethod(key_with_info->key, compression_hint); - read_buf = createS3ReadBuffer( - key_with_info->key, key_with_info->info->size, getContext(), client, bucket, version_id, request_settings); - } - else - { - compression_method = chooseCompressionMethod(key_with_info->path_in_archive.value(), compression_hint); - read_buf = key_with_info->archive_reader->readFile(key_with_info->path_in_archive.value(), /*throw_on_not_found=*/true); - } - auto input_format = FormatFactory::instance().getInput( - format, - *read_buf, - sample_block, - getContext(), - max_block_size, - format_settings, - max_parsing_threads, - /* max_download_threads= */ std::nullopt, - /* is_remote_fs */ true, - compression_method, - need_only_count); - - if (key_condition) - input_format->setKeyCondition(key_condition); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - - if (columns_desc.hasDefaults()) - { - builder.addSimpleTransform( - [&](const Block & header) - { return std::make_shared(header, columns_desc, *input_format, getContext()); }); - } - - source = input_format; - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - auto current_reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - - return ReaderHolder{key_with_info, bucket, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)}; -} - -std::future StorageS3Source::createReaderAsync(size_t idx) -{ - return create_reader_scheduler([=, this] { return createReader(idx); }, Priority{}); -} - -std::unique_ptr createS3ReadBuffer( - const String & key, - size_t object_size, - std::shared_ptr context, - std::shared_ptr client_ptr, - const String & bucket, - const String & version_id, - const S3Settings::RequestSettings & request_settings) -{ - auto read_settings = context->getReadSettings().adjustBufferSize(object_size); - read_settings.enable_filesystem_cache = false; - auto download_buffer_size = context->getSettings().max_download_buffer_size; - const bool object_too_small = object_size <= 2 * download_buffer_size; - static LoggerPtr log = getLogger("StorageS3Source"); - - // Create a read buffer that will prefetch the first ~1 MB of the file. - // When reading lots of tiny files, this prefetching almost doubles the throughput. - // For bigger files, parallel reading is more useful. - if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - LOG_TRACE(log, "Downloading object of size {} from S3 with initial prefetch", object_size); - return createAsyncS3ReadBuffer(key, read_settings, object_size, context, client_ptr, bucket, version_id, request_settings); - } - - - return std::make_unique( - client_ptr, - bucket, - key, - version_id, - request_settings, - read_settings, - /*use_external_buffer*/ false, - /*offset_*/ 0, - /*read_until_position_*/ 0, - /*restricted_seek_*/ false, - object_size); -} - -std::unique_ptr createAsyncS3ReadBuffer( - const String & key, - const ReadSettings & read_settings, - size_t object_size, - std::shared_ptr context, - std::shared_ptr client_ptr, - const String & bucket, - const String & version_id, - const S3Settings::RequestSettings & request_settings) -{ - auto read_buffer_creator = [=](bool restricted_seek, const StoredObject & object) -> std::unique_ptr - { - return std::make_unique( - client_ptr, - bucket, - object.remote_path, - version_id, - request_settings, - read_settings, - /* use_external_buffer */ true, - /* offset */ 0, - /* read_until_position */ 0, - restricted_seek, - object_size); - }; - - auto modified_settings{read_settings}; - /// User's S3 object may change, don't cache it. - modified_settings.use_page_cache_for_disks_without_file_cache = false; - - /// FIXME: Changing this setting to default value breaks something around parquet reading - modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; - - auto s3_impl = std::make_unique( - std::move(read_buffer_creator), - StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, - "", - read_settings, - /* cache_log */ nullptr, - /* use_external_buffer */ true); - - auto & pool_reader = context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); - auto async_reader = std::make_unique( - std::move(s3_impl), pool_reader, modified_settings, context->getAsyncReadCounters(), context->getFilesystemReadPrefetchesLog()); - - async_reader->setReadUntilEnd(); - if (read_settings.remote_fs_prefetch) - async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); - - return async_reader; -} - -StorageS3Source::~StorageS3Source() -{ - create_reader_pool.wait(); -} - -String StorageS3Source::getName() const -{ - return name; -} - -Chunk StorageS3Source::generate() -{ - lazyInitialize(); - - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (const auto * input_format = reader.getInputFormat()) - chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - String file_name = reader.getFile(); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, requested_virtual_columns, reader.getPath(), reader.getFileSize(), reader.isArchive() ? (&file_name) : nullptr); - return chunk; - } - - if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getPath(), total_rows_in_file); - - total_rows_in_file = 0; - - assert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - break; - - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - create_reader_pool.wait(); - reader_future = createReaderAsync(); - } - - return {}; -} - -void StorageS3Source::addNumRowsToCache(const String & bucket_with_key, size_t num_rows) -{ - String source = fs::path(url_host_and_port) / bucket_with_key; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional StorageS3Source::tryGetNumRowsFromCache(const KeyWithInfo & key_with_info) -{ - String source = fs::path(url_host_and_port) / bucket / key_with_info.key; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - auto get_last_mod_time = [&]() -> std::optional { return key_with_info.info->last_modification_time; }; - - return StorageS3::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -class StorageS3Sink : public SinkToStorage -{ -public: - StorageS3Sink( - const String & format, - const Block & sample_block_, - const ContextPtr & context, - std::optional format_settings_, - const CompressionMethod compression_method, - const StorageS3::Configuration & configuration_, - const String & bucket, - const String & key) - : SinkToStorage(sample_block_), sample_block(sample_block_), format_settings(format_settings_) - { - BlobStorageLogWriterPtr blob_log = nullptr; - if (auto blob_storage_log = context->getBlobStorageLog()) - { - blob_log = std::make_shared(std::move(blob_storage_log)); - blob_log->query_id = context->getCurrentQueryId(); - } - - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique( - configuration_.client, - bucket, - key, - DBMS_DEFAULT_BUFFER_SIZE, - configuration_.request_settings, - std::move(blob_log), - std::nullopt, - threadPoolCallbackRunnerUnsafe(getIOThreadPool().get(), "S3ParallelWrite"), - context->getWriteSettings()), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer - = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings); - } - - String getName() const override { return "StorageS3Sink"; } - - void consume(Chunk & chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf.reset(); - } - - Block sample_block; - std::optional format_settings; - std::unique_ptr write_buf; - OutputFormatPtr writer; - bool cancelled = false; - std::mutex cancel_mutex; -}; - -namespace -{ - -std::optional checkAndGetNewFileOnInsertIfNeeded( - const ContextPtr & context, const StorageS3::Configuration & configuration, const String & key, size_t sequence_number) -{ - if (context->getSettingsRef().s3_truncate_on_insert - || !S3::objectExists( - *configuration.client, configuration.url.bucket, key, configuration.url.version_id, configuration.request_settings)) - return std::nullopt; - - if (context->getSettingsRef().s3_create_new_file_on_insert) - { - auto pos = key.find_first_of('.'); - String new_key; - do - { - new_key = key.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : key.substr(pos)); - ++sequence_number; - } while (S3::objectExists( - *configuration.client, configuration.url.bucket, new_key, configuration.url.version_id, configuration.request_settings)); - - return new_key; - } - - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object in bucket {} with key {} already exists. " - "If you want to overwrite it, enable setting s3_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting s3_create_new_file_on_insert", - configuration.url.bucket, key); -} -} - - -class PartitionedStorageS3Sink : public PartitionedSink, WithContext -{ -public: - PartitionedStorageS3Sink( - const ASTPtr & partition_by, - const String & format_, - const Block & sample_block_, - const ContextPtr & context_, - std::optional format_settings_, - const CompressionMethod compression_method_, - const StorageS3::Configuration & configuration_, - const String & bucket_, - const String & key_) - : PartitionedSink(partition_by, context_, sample_block_) - , WithContext(context_) - , format(format_) - , sample_block(sample_block_) - , compression_method(compression_method_) - , configuration(configuration_) - , bucket(bucket_) - , key(key_) - , format_settings(format_settings_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto partition_bucket = replaceWildcards(bucket, partition_id); - validateBucket(partition_bucket); - - auto partition_key = replaceWildcards(key, partition_id); - validateKey(partition_key); - - if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(getContext(), configuration, partition_key, /* sequence_number */ 1)) - partition_key = *new_key; - - return std::make_shared( - format, sample_block, getContext(), format_settings, compression_method, configuration, partition_bucket, partition_key); - } - -private: - const String format; - const Block sample_block; - const CompressionMethod compression_method; - const StorageS3::Configuration configuration; - const String bucket; - const String key; - const std::optional format_settings; - - static void validateBucket(const String & str) - { - S3::URI::validateBucket(str, {}); - - if (!DB::UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in bucket name"); - - validatePartitionKey(str, false); - } - - static void validateKey(const String & str) - { - /// See: - /// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html - /// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#putobject - - if (str.empty() || str.size() > 1024) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect key length (not empty, max 1023 characters), got: {}", str.size()); - - if (!DB::UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in key"); - - validatePartitionKey(str, true); - } -}; - - -StorageS3::StorageS3( - const Configuration & configuration_, - const ContextPtr & context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , configuration(configuration_) - , name(configuration.url.storage_name) - , distributed_processing(distributed_processing_) - , format_settings(format_settings_) - , partition_by(partition_by_) -{ - updateConfiguration(context_); // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) - - if (configuration.format != "auto") - FormatFactory::instance().checkFormatName(configuration.format); - context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.url.uri); - context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration.headers_from_ast); - - StorageInMemoryMetadata storage_metadata; - if (columns_.empty()) - { - ColumnsDescription columns; - if (configuration.format == "auto") - std::tie(columns, configuration.format) = getTableStructureAndFormatFromData(configuration, format_settings, context_); - else - columns = getTableStructureFromData(configuration, format_settings, context_); - - storage_metadata.setColumns(columns); - } - else - { - if (configuration.format == "auto") - configuration.format = getTableStructureAndFormatFromData(configuration, format_settings, context_).second; - - /// We don't allow special columns in S3 storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); -} - -static std::shared_ptr createFileIterator( - StorageS3::Configuration configuration, - bool distributed_processing, - ContextPtr local_context, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns, - StorageS3Source::KeysWithInfo * read_keys = nullptr, - std::function file_progress_callback = {}) -{ - if (distributed_processing) - { - return std::make_shared( - local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads); - } - else - { - auto basic_iterator = [&]() -> std::unique_ptr - { - StorageS3Source::KeysWithInfo * local_read_keys = configuration.url.archive_pattern.has_value() ? nullptr : read_keys; - if (configuration.withGlobs()) - { - /// Iterate through disclosed globs and make a source for each file - return std::make_unique( - *configuration.client, - configuration.url, - predicate, - virtual_columns, - local_context, - local_read_keys, - configuration.request_settings, - file_progress_callback); - } - else - { - Strings keys = configuration.keys; - auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - if (filter_dag) - { - std::vector paths; - paths.reserve(keys.size()); - for (const auto & key : keys) - paths.push_back(fs::path(configuration.url.bucket) / key); - VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context); - } - return std::make_unique( - *configuration.client, - configuration.url.version_id, - keys, - configuration.url.bucket, - configuration.request_settings, - local_read_keys, - file_progress_callback); - } - }(); - if (configuration.url.archive_pattern.has_value()) - { - return std::make_shared( - std::move(basic_iterator), - configuration.url.archive_pattern.value(), - configuration.client, - configuration.url.bucket, - configuration.url.version_id, - configuration.request_settings, - local_context, - read_keys); - } - else - { - return basic_iterator; - } - } -} - -bool StorageS3::supportsSubsetOfColumns(const ContextPtr & context) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getFormatCopy(), context, format_settings); -} - -bool StorageS3::prefersLargeBlocks() const -{ - return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(getFormatCopy()); -} - -bool StorageS3::parallelizeOutputAfterReading(ContextPtr context) const -{ - return FormatFactory::instance().checkParallelizeOutputAfterReading(getFormatCopy(), context); -} - -void StorageS3::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr local_context, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - updateConfiguration(local_context); - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); - - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && local_context->getSettingsRef().optimize_count_from_files; - - auto reading = std::make_unique( - column_names, - query_info, - storage_snapshot, - local_context, - read_from_format_info.source_header, - *this, - std::move(read_from_format_info), - need_only_count, - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromStorageS3Step::applyFilters(ActionDAGNodes added_filter_nodes) -{ - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - createIterator(predicate); -} - -void ReadFromStorageS3Step::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - iterator_wrapper = createFileIterator( - storage.getConfigurationCopy(), - storage.distributed_processing, - context, - predicate, - virtual_columns, - nullptr, - context->getFileProgressCallback()); -} - -void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - if (storage.partition_by && query_configuration.withPartitionWildcard()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned S3 storage is not implemented yet"); - - createIterator(nullptr); - size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount(); - if (estimated_keys_count > 1) - num_streams = std::min(num_streams, estimated_keys_count); - else - { - /// The amount of keys (zero) was probably underestimated. We will keep one stream for this particular case. - num_streams = 1; - } - - const size_t max_threads = context->getSettingsRef().max_threads; - const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul)); - - Pipes pipes; - pipes.reserve(num_streams); - for (size_t i = 0; i < num_streams; ++i) - { - auto source = std::make_shared( - read_from_format_info, - query_configuration.format, - storage.getName(), - context, - storage.format_settings, - max_block_size, - query_configuration.request_settings, - query_configuration.compression_method, - query_configuration.client, - query_configuration.url.bucket, - query_configuration.url.version_id, - query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()), - iterator_wrapper, - max_parsing_threads, - need_only_count); - - source->setKeyCondition(filter_actions_dag, context); - pipes.emplace_back(std::move(source)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(read_from_format_info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageS3::write( - const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) -{ - auto query_configuration = updateConfigurationAndGetCopy(local_context); - auto key = query_configuration.keys.front(); - - if (query_configuration.withGlobsIgnorePartitionWildcard()) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, - "S3 key '{}' contains globs, so the table is in readonly mode", query_configuration.url.key); - - auto sample_block = metadata_snapshot->getSampleBlock(); - auto chosen_compression_method = chooseCompressionMethod(query_configuration.keys.back(), query_configuration.compression_method); - auto insert_query = std::dynamic_pointer_cast(query); - - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && query_configuration.withPartitionWildcard(); - - if (is_partitioned_implementation) - { - return std::make_shared( - partition_by_ast, - query_configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - query_configuration, - query_configuration.url.bucket, - key); - } - else - { - if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(local_context, query_configuration, query_configuration.keys.front(), query_configuration.keys.size())) - { - std::lock_guard lock{configuration_update_mutex}; - query_configuration.keys.push_back(*new_key); - configuration.keys.push_back(*new_key); - key = *new_key; - } - - return std::make_shared( - query_configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - query_configuration, - query_configuration.url.bucket, - key); - } -} - -void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) -{ - auto query_configuration = updateConfigurationAndGetCopy(local_context); - - if (query_configuration.withGlobs()) - { - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "S3 key '{}' contains globs, so the table is in readonly mode", - query_configuration.url.key); - } - - Aws::S3::Model::Delete delkeys; - - for (const auto & key : query_configuration.keys) - { - Aws::S3::Model::ObjectIdentifier obj; - obj.SetKey(key); - delkeys.AddObjects(std::move(obj)); - } - - ProfileEvents::increment(ProfileEvents::S3DeleteObjects); - S3::DeleteObjectsRequest request; - request.SetBucket(query_configuration.url.bucket); - request.SetDelete(delkeys); - - auto response = query_configuration.client->DeleteObjects(request); - - const auto * response_error = response.IsSuccess() ? nullptr : &response.GetError(); - auto time_now = std::chrono::system_clock::now(); - if (auto blob_storage_log = BlobStorageLogWriter::create()) - for (const auto & key : query_configuration.keys) - blob_storage_log->addEvent( - BlobStorageLogElement::EventType::Delete, query_configuration.url.bucket, key, {}, 0, response_error, time_now); - - if (!response.IsSuccess()) - { - const auto & err = response.GetError(); - throw S3Exception(err.GetMessage(), err.GetErrorType()); - } - - for (const auto & error : response.GetResult().GetErrors()) - LOG_WARNING(getLogger("StorageS3"), "Failed to delete {}, error: {}", error.GetKey(), error.GetMessage()); -} - -StorageS3::Configuration StorageS3::updateConfigurationAndGetCopy(const ContextPtr & local_context) -{ - std::lock_guard lock(configuration_update_mutex); - configuration.update(local_context); - return configuration; -} - -void StorageS3::updateConfiguration(const ContextPtr & local_context) -{ - std::lock_guard lock(configuration_update_mutex); - configuration.update(local_context); -} - -void StorageS3::useConfiguration(const StorageS3::Configuration & new_configuration) -{ - std::lock_guard lock(configuration_update_mutex); - configuration = new_configuration; -} - -StorageS3::Configuration StorageS3::getConfigurationCopy() const -{ - std::lock_guard lock(configuration_update_mutex); - return configuration; -} - -String StorageS3::getFormatCopy() const -{ - std::lock_guard lock(configuration_update_mutex); - return configuration.format; -} - -bool StorageS3::Configuration::update(const ContextPtr & context) -{ - auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString(), context->getUserName()); - request_settings = s3_settings.request_settings; - request_settings.updateFromSettings(context->getSettings()); - - if (client && (static_configuration || !auth_settings.hasUpdates(s3_settings.auth_settings))) - return false; - - auth_settings.updateFrom(s3_settings.auth_settings); - keys[0] = url.key; - connect(context); - return true; -} - -void StorageS3::Configuration::connect(const ContextPtr & context) -{ - const Settings & global_settings = context->getGlobalContext()->getSettingsRef(); - const Settings & local_settings = context->getSettingsRef(); - - if (S3::isS3ExpressEndpoint(url.endpoint) && auth_settings.region.empty()) - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets"); - - S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( - auth_settings.region, - context->getRemoteHostFilter(), - static_cast(global_settings.s3_max_redirects), - static_cast(global_settings.s3_retry_attempts), - global_settings.enable_s3_requests_logging, - /* for_disk_s3 = */ false, - request_settings.get_request_throttler, - request_settings.put_request_throttler, - url.uri.getScheme()); - - client_configuration.endpointOverride = url.endpoint; - /// seems as we don't use it - client_configuration.maxConnections = static_cast(request_settings.max_connections); - client_configuration.connectTimeoutMs = local_settings.s3_connect_timeout_ms; - client_configuration.http_keep_alive_timeout = S3::DEFAULT_KEEP_ALIVE_TIMEOUT; - client_configuration.http_keep_alive_max_requests = S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS; - - auto headers = auth_settings.headers; - if (!headers_from_ast.empty()) - headers.insert(headers.end(), headers_from_ast.begin(), headers_from_ast.end()); - - client_configuration.requestTimeoutMs = request_settings.request_timeout_ms; - - S3::ClientSettings client_settings{ - .use_virtual_addressing = url.is_virtual_hosted_style, - .disable_checksum = local_settings.s3_disable_checksum, - .gcs_issue_compose_request = context->getConfigRef().getBool("s3.gcs_issue_compose_request", false), - .is_s3express_bucket = S3::isS3ExpressEndpoint(url.endpoint), - }; - - auto credentials - = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token); - client = S3::ClientFactory::instance().create( - client_configuration, - client_settings, - credentials.GetAWSAccessKeyId(), - credentials.GetAWSSecretKey(), - auth_settings.server_side_encryption_customer_key_base64, - auth_settings.server_side_encryption_kms_config, - std::move(headers), - S3::CredentialsConfiguration{ - auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)), - auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)), - auth_settings.expiration_window_seconds.value_or( - context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), - auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), - }, - credentials.GetSessionToken()); -} - -bool StorageS3::Configuration::withGlobsIgnorePartitionWildcard() const -{ - if (!withPartitionWildcard()) - return withGlobs(); - - return PartitionedSink::replaceWildcards(getPath(), "").find_first_of("*?{") != std::string::npos; -} - -void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection) -{ - validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); - - auto filename = collection.getOrDefault("filename", ""); - if (!filename.empty()) - configuration.url = S3::URI(std::filesystem::path(collection.get("url")) / filename); - else - configuration.url = S3::URI(collection.get("url")); - - configuration.auth_settings.access_key_id = collection.getOrDefault("access_key_id", ""); - configuration.auth_settings.secret_access_key = collection.getOrDefault("secret_access_key", ""); - configuration.auth_settings.use_environment_credentials = collection.getOrDefault("use_environment_credentials", 1); - configuration.auth_settings.no_sign_request = collection.getOrDefault("no_sign_request", false); - configuration.auth_settings.expiration_window_seconds - = collection.getOrDefault("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS); - - configuration.format = collection.getOrDefault("format", configuration.format); - configuration.compression_method - = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); - configuration.structure = collection.getOrDefault("structure", "auto"); - - configuration.request_settings = S3Settings::RequestSettings(collection); -} - -StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file) -{ - StorageS3::Configuration configuration; - - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - { - processNamedCollectionResult(configuration, *named_collection); - } - else - { - /// Supported signatures: - /// - /// S3('url') - /// S3('url', 'format') - /// S3('url', 'format', 'compression') - /// S3('url', NOSIGN) - /// S3('url', NOSIGN, 'format') - /// S3('url', NOSIGN, 'format', 'compression') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format', 'compression') - /// with optional headers() function - - size_t count = StorageURL::evalArgsAndCollectHeaders(engine_args, configuration.headers_from_ast, local_context); - - if (count == 0 || count > 6) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage S3 requires 1 to 6 positional arguments: " - "url, [NOSIGN | access_key_id, secret_access_key], [session_token], [name of used format], [compression_method], [headers], [extra_credentials]"); - - std::unordered_map engine_args_to_idx; - bool no_sign_request = false; - - /// For 2 arguments we support 2 possible variants: - /// - s3(source, format) - /// - s3(source, NOSIGN) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - if (count == 2) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "format/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - no_sign_request = true; - else - engine_args_to_idx = {{"format", 1}}; - } - /// For 3 arguments we support 2 possible variants: - /// - s3(source, format, compression_method) - /// - s3(source, access_key_id, secret_access_key) - /// - s3(source, NOSIGN, format) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or format name. - else if (count == 3) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "format/access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - engine_args_to_idx = {{"format", 2}}; - } - else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) - engine_args_to_idx = {{"format", 1}, {"compression_method", 2}}; - else - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; - } - /// For 4 arguments we support 3 possible variants: - /// - s3(source, access_key_id, secret_access_key, session_token) - /// - s3(source, access_key_id, secret_access_key, format) - /// - s3(source, NOSIGN, format, compression_method) - /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN or not. - else if (count == 4) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - engine_args_to_idx = {{"format", 2}, {"compression_method", 3}}; - } - else - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; - else - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; - } - } - /// For 5 arguments we support 2 possible variants: - /// - s3(source, access_key_id, secret_access_key, session_token, format) - /// - s3(source, access_key_id, secret_access_key, format, compression) - else if (count == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}}; - else - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; - } - else if (count == 6) - { - engine_args_to_idx - = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}; - } - - /// This argument is always the first - configuration.url = S3::URI(checkAndGetLiteralArgument(engine_args[0], "url")); - - if (engine_args_to_idx.contains("format")) - configuration.format = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["format"]], "format"); - - if (engine_args_to_idx.contains("compression_method")) - configuration.compression_method - = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["compression_method"]], "compression_method"); - - if (engine_args_to_idx.contains("access_key_id")) - configuration.auth_settings.access_key_id - = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["access_key_id"]], "access_key_id"); - - if (engine_args_to_idx.contains("secret_access_key")) - configuration.auth_settings.secret_access_key - = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key"); - - if (engine_args_to_idx.contains("session_token")) - configuration.auth_settings.session_token - = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["session_token"]], "session_token"); - - if (no_sign_request) - configuration.auth_settings.no_sign_request = no_sign_request; - } - - configuration.static_configuration - = !configuration.auth_settings.access_key_id.empty() || configuration.auth_settings.no_sign_request.has_value(); - - configuration.keys = {configuration.url.key}; - - if (configuration.format == "auto" && get_format_from_file) - { - if (configuration.url.archive_pattern.has_value()) - { - configuration.format = FormatFactory::instance() - .tryGetFormatFromFileName(Poco::URI(configuration.url.archive_pattern.value()).getPath()) - .value_or("auto"); - } - else - { - configuration.format - = FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(configuration.url.uri_str).getPath()).value_or("auto"); - } - } - - return configuration; -} - -ColumnsDescription StorageS3::getTableStructureFromData( - const StorageS3::Configuration & configuration_, const std::optional & format_settings_, const ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(configuration_.format, configuration_, format_settings_, ctx).first; -} - -std::pair StorageS3::getTableStructureAndFormatFromData( - const StorageS3::Configuration & configuration, const std::optional & format_settings, const ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(std::nullopt, configuration, format_settings, ctx); -} - -class ReadBufferIterator : public IReadBufferIterator, WithContext -{ -public: - ReadBufferIterator( - std::shared_ptr file_iterator_, - const StorageS3Source::KeysWithInfo & read_keys_, - const StorageS3::Configuration & configuration_, - std::optional format_, - const std::optional & format_settings_, - ContextPtr context_) - : WithContext(context_) - , file_iterator(file_iterator_) - , read_keys(read_keys_) - , configuration(configuration_) - , format(std::move(format_)) - , format_settings(format_settings_) - , prev_read_keys_size(read_keys_.size()) - { - } - - Data next() override - { - if (first) - { - /// If format is unknown we iterate through all currently read keys on first iteration and - /// try to determine format by file name. - if (!format) - { - for (const auto & key_with_info : read_keys) - { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(key_with_info->getFileName())) - { - format = format_from_file_name; - break; - } - } - } - - /// For default mode check cached columns for currently read keys on first iteration. - if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns, format}; - } - } - - while (true) - { - current_key_with_info = (*file_iterator)(); - - if (!current_key_with_info || current_key_with_info->key.empty()) - { - if (first) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files with provided path " - "in S3 or all files are empty. You can specify table structure manually", - *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because there are no files with provided path " - "in S3 or all files are empty. You can specify the format manually"); - } - - return {nullptr, std::nullopt, format}; - } - - if (read_keys.size() > prev_read_keys_size) - { - /// If format is unknown we can try to determine it by new file names. - if (!format) - { - for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) - { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName())) - { - format = format_from_file_name; - break; - } - } - } - - /// Check new files in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); - if (columns_from_cache) - return {nullptr, columns_from_cache, format}; - } - - prev_read_keys_size = read_keys.size(); - } - - if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0) - continue; - - /// In union mode, check cached columns only for current key. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - StorageS3Source::KeysWithInfo keys = {current_key_with_info}; - if (auto columns_from_cache = tryGetColumnsFromCache(keys.begin(), keys.end())) - { - first = false; - return {nullptr, columns_from_cache, format}; - } - } - - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - std::unique_ptr impl; - - if (!current_key_with_info->path_in_archive.has_value()) - { - impl = std::make_unique( - configuration.client, - configuration.url.bucket, - current_key_with_info->key, - configuration.url.version_id, - configuration.request_settings, - getContext()->getReadSettings()); - } - else - { - assert(current_key_with_info->archive_reader); - impl = current_key_with_info->archive_reader->readFile( - current_key_with_info->path_in_archive.value(), /*throw_on_not_found=*/true); - } - if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof()) - { - first = false; - return { - wrapReadBufferWithCompressionMethod( - std::move(impl), - current_key_with_info->path_in_archive.has_value() - ? chooseCompressionMethod(current_key_with_info->path_in_archive.value(), configuration.compression_method) - : chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), - zstd_window_log_max), - std::nullopt, - format}; - } - } - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3) - return; - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) - / configuration.url.bucket / current_key_with_info->getPath(); - auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3 - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) - / configuration.url.bucket / current_key_with_info->getPath(); - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addColumns(cache_key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3 - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - auto host_and_bucket = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket; - Strings sources; - sources.reserve(read_keys.size()); - std::transform( - read_keys.begin(), - read_keys.end(), - std::back_inserter(sources), - [&](const auto & elem) { return host_and_bucket / elem->getPath(); }); - auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override - { - if (current_key_with_info) - return current_key_with_info->getPath(); - return ""; - } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - chassert(current_key_with_info); - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - auto impl = std::make_unique(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings()); - return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max); - } - -private: - std::optional tryGetColumnsFromCache( - const StorageS3Source::KeysWithInfo::const_iterator & begin, const StorageS3Source::KeysWithInfo::const_iterator & end) - { - auto context = getContext(); - if (!context->getSettingsRef().schema_inference_use_cache_for_s3) - return std::nullopt; - - auto & schema_cache = StorageS3::getSchemaCache(context); - for (auto it = begin; it < end; ++it) - { - auto get_last_mod_time = [&] - { - time_t last_modification_time = 0; - if ((*it)->info) - { - last_modification_time = (*it)->info->last_modification_time; - } - else - { - /// Note that in case of exception in getObjectInfo returned info will be empty, - /// but schema cache will handle this case and won't return columns from cache - /// because we can't say that it's valid without last modification time. - last_modification_time = S3::getObjectInfo( - *configuration.client, - configuration.url.bucket, - (*it)->key, - configuration.url.version_id, - configuration.request_settings, - /*with_metadata=*/ false, - /*throw_on_error= */ false).last_modification_time; - } - - return last_modification_time ? std::make_optional(last_modification_time) : std::nullopt; - }; - String path = fs::path(configuration.url.bucket) / (*it)->getPath(); - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path; - - if (format) - { - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; - } - else - { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry fcreateor some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) - { - auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } - } - } - } - - return std::nullopt; - } - - std::shared_ptr file_iterator; - const StorageS3Source::KeysWithInfo & read_keys; - const StorageS3::Configuration & configuration; - std::optional format; - const std::optional & format_settings; - StorageS3Source::KeyWithInfoPtr current_key_with_info; - size_t prev_read_keys_size; - bool first = true; -}; - -std::pair StorageS3::getTableStructureAndFormatFromDataImpl( - std::optional format, - const StorageS3::Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx) -{ - KeysWithInfo read_keys; - - auto file_iterator = createFileIterator(configuration, false, ctx, {}, {}, &read_keys); - - ReadBufferIterator read_buffer_iterator(file_iterator, read_keys, configuration, format, format_settings, ctx); - if (format) - return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, ctx), *format}; - return detectFormatAndReadSchema(format_settings, read_buffer_iterator, ctx); -} - -void registerStorageS3Impl(const String & name, StorageFactory & factory) -{ - factory.registerStorage(name, [](const StorageFactory::Arguments & args) - { - auto & engine_args = args.engine_args; - if (engine_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - - auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext()); - // Use format settings from global server context + settings from - // the SETTINGS clause of the create query. Settings from current - // session and user are ignored. - std::optional format_settings; - if (args.storage_def->settings) - { - FormatFactorySettings user_format_settings; - - // Apply changed settings from global context, but ignore the - // unknown ones, because we only have the format settings here. - const auto & changes = args.getContext()->getSettingsRef().changes(); - for (const auto & change : changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.set(change.name, change.value); - } - - // Apply changes from SETTINGS clause, with validation. - user_format_settings.applyChanges(args.storage_def->settings->changes); - format_settings = getFormatSettings(args.getContext(), user_format_settings); - } - else - { - format_settings = getFormatSettings(args.getContext()); - } - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - return std::make_shared( - std::move(configuration), - args.getContext(), - args.table_id, - args.columns, - args.constraints, - args.comment, - format_settings, - /* distributed_processing_ */false, - partition_by); - }, - { - .supports_settings = true, - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::S3, - }); -} - -void registerStorageS3(StorageFactory & factory) -{ - registerStorageS3Impl("S3", factory); - registerStorageS3Impl("COSN", factory); - registerStorageS3Impl("OSS", factory); -} - -bool StorageS3::supportsPartitionBy() const -{ - return true; -} - -SchemaCache & StorageS3::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_s3", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} -} - -#endif diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index a9801e3b910..dadf2f35e6e 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -137,7 +137,7 @@ select arrayUniq(thread_ids) from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '0' and Settings['max_insert_threads'] = '16'; -5 +18 select count() from testX; 60 select count() from testXA; @@ -185,7 +185,7 @@ select arrayUniq(thread_ids) from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '1' and Settings['max_insert_threads'] = '16'; -5 +18 select count() from testX; 80 select count() from testXA; diff --git a/tests/queries/0_stateless/01927_query_views_log_current_database.sql b/tests/queries/0_stateless/01927_query_views_log_current_database.sql index ba42795333c..6287156daaf 100644 --- a/tests/queries/0_stateless/01927_query_views_log_current_database.sql +++ b/tests/queries/0_stateless/01927_query_views_log_current_database.sql @@ -16,6 +16,7 @@ CREATE MATERIALIZED VIEW matview_b_to_c TO table_c AS SELECT SUM(a + sleepEachRo CREATE MATERIALIZED VIEW matview_join_d_e TO table_f AS SELECT table_d.a as a, table_e.count + sleepEachRow(0.000003) as count FROM table_d LEFT JOIN table_e ON table_d.a = table_e.a; -- ENABLE LOGS +SET parallel_view_processing=0; SET log_query_views=1; SET log_queries_min_type='QUERY_FINISH'; SET log_queries=1; diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference index e0cc8f0ce63..2d9f236ada9 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference @@ -1,8 +1,8 @@ -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent -18 18 9 18 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent -18 9 9 9 -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent -18 18 9 18 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 +18 36 27 36 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data +18 18 18 18 +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 +18 36 27 36 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data 18 18 18 18 diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql index 88d3165d060..6a155bcda46 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql @@ -1,6 +1,6 @@ -- Tags: long -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; drop table if exists test sync; drop table if exists test_mv_a sync; @@ -35,7 +35,7 @@ select (select sum(c) from test_mv_c where test='case1'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data'; set deduplicate_blocks_in_dependent_materialized_views=1; @@ -53,7 +53,7 @@ select (select sum(c) from test_mv_c where test='case2'); -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; set deduplicate_blocks_in_dependent_materialized_views=0; @@ -70,7 +70,7 @@ select (select sum(c) from test_mv_b where test='case3'), (select sum(c) from test_mv_c where test='case3'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data'; set deduplicate_blocks_in_dependent_materialized_views=1; diff --git a/tests/queries/0_stateless/02125_query_views_log.sql b/tests/queries/0_stateless/02125_query_views_log.sql index d2d19b76a1f..ba50902ebea 100644 --- a/tests/queries/0_stateless/02125_query_views_log.sql +++ b/tests/queries/0_stateless/02125_query_views_log.sql @@ -8,7 +8,7 @@ create table dst (key Int) engine=Null(); create materialized view mv1 to dst as select * from src; create materialized view mv2 to dst as select * from src; -insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=1; +insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=0; system flush logs; -- { echo } From 5f63abfd43e6946ff4f21d261f77e4eeb8b7d7c5 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 29 May 2024 14:31:11 +0200 Subject: [PATCH 065/273] work with tests --- .../Transforms/CountingTransform.cpp | 3 -- src/Processors/Transforms/CountingTransform.h | 2 -- .../Transforms/SquashingChunksTransform.cpp | 32 +++++++++---------- .../Transforms/buildPushingToViewsChain.cpp | 6 ++-- src/Storages/MergeTree/MergeTreeDataWriter.h | 1 - src/Storages/StorageLog.cpp | 1 + 6 files changed, 19 insertions(+), 26 deletions(-) diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index c138eed69de..d39c6575292 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -18,9 +18,6 @@ namespace DB void CountingTransform::onConsume(Chunk chunk) { - LOG_DEBUG(getLogger("CountingTransform"), - "onConsume rows {} bytes {}, progress rows {} bytes {}", chunk.getNumRows(), chunk.bytes(), progress.written_rows, progress.written_bytes); - if (quota) quota->used(QuotaType::WRITTEN_BYTES, chunk.bytes()); diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h index ab8d083fd05..4efcf147ac7 100644 --- a/src/Processors/Transforms/CountingTransform.h +++ b/src/Processors/Transforms/CountingTransform.h @@ -45,8 +45,6 @@ public: void onConsume(Chunk chunk) override; GenerateResult onGenerate() override { - LOG_DEBUG(getLogger("CountingTransform"), - "onGenerate {}", cur_chunk.getNumRows()); GenerateResult res; res.chunk = std::move(cur_chunk); return res; diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 2ee13c05b95..531d264a25a 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -17,8 +17,8 @@ SquashingChunksTransform::SquashingChunksTransform( void SquashingChunksTransform::onConsume(Chunk chunk) { - LOG_DEBUG(getLogger("SquashingChunksTransform"), - "onConsume {}", chunk.getNumRows()); + // LOG_DEBUG(getLogger("SquashingChunksTransform"), + // "onConsume {}", chunk.getNumRows()); auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); cur_chunk = Chunk(result.block.getColumns(), result.block.rows()); @@ -37,14 +37,14 @@ void SquashingChunksTransform::onConsume(Chunk chunk) cur_chunkinfos = {}; } - LOG_DEBUG(getLogger("SquashingChunksTransform"), - "got result rows {}, size {}, columns {}, infos: {}/{}", - cur_chunk.getNumRows(), cur_chunk.bytes(), cur_chunk.getNumColumns(), - cur_chunk.getChunkInfos().size(), cur_chunk.getChunkInfos().debug()); + // LOG_DEBUG(getLogger("SquashingChunksTransform"), + // "got result rows {}, size {}, columns {}, infos: {}/{}", + // cur_chunk.getNumRows(), cur_chunk.bytes(), cur_chunk.getNumColumns(), + // cur_chunk.getChunkInfos().size(), cur_chunk.getChunkInfos().debug()); } else { - assert(!result.input_block_delayed); + assert(result.input_block_delayed); cur_chunkinfos = std::move(chunk.getChunkInfos()); } } @@ -90,10 +90,10 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( void SimpleSquashingChunksTransform::consume(Chunk chunk) { - LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - "transform rows {}, size {}, columns {}, infos: {}/{}", - chunk.getNumRows(), chunk.bytes(), chunk.getNumColumns(), - chunk.getChunkInfos().size(), chunk.getChunkInfos().debug()); + // LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), + // "transform rows {}, size {}, columns {}, infos: {}/{}", + // chunk.getNumRows(), chunk.bytes(), chunk.getNumColumns(), + // chunk.getChunkInfos().size(), chunk.getChunkInfos().debug()); auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); @@ -111,14 +111,14 @@ void SimpleSquashingChunksTransform::consume(Chunk chunk) squashed_info = {}; } - LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - "got result rows {}, size {}, columns {}, infos: {}/{}", - squashed_chunk.getNumRows(), squashed_chunk.bytes(), squashed_chunk.getNumColumns(), - squashed_chunk.getChunkInfos().size(), squashed_chunk.getChunkInfos().debug()); + // LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), + // "got result rows {}, size {}, columns {}, infos: {}/{}", + // squashed_chunk.getNumRows(), squashed_chunk.bytes(), squashed_chunk.getNumColumns(), + // squashed_chunk.getChunkInfos().size(), squashed_chunk.getChunkInfos().debug()); } else { - assert(!result.input_block_delayed); + chassert(result.input_block_delayed); squashed_info = std::move(chunk.getChunkInfos()); } } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index d44796610ed..996fe3efdc5 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -552,10 +552,8 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); } - else - { - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); - } + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index a9a44813545..863c951d957 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -47,7 +47,6 @@ public: : data(data_) , log(getLogger(data.getLogName() + " (Writer)")) { - LOG_DEBUG(log, "MergeTreeDataWriter() called from:\n{}", StackTrace().toString()); } /** Split the block to blocks, each of them must be written as separate part. diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 6ef16189335..8b1bf4637b4 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -1,6 +1,7 @@ #include #include +#include "Common/logger_useful.h" #include #include #include From 62c764c2169cd5bed627bd670a5d93fa854c1fa2 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 29 May 2024 17:36:43 +0200 Subject: [PATCH 066/273] work with tests --- tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql index 06fe156500d..450d92476a9 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql @@ -54,7 +54,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view - 1st insert works for landing and mv tables - 2nd insert gets first block 20220901 deduplicated and second one inserted in landing table - - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded + - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded, now that block is inserted because deduplicate_blocks_in_dependent_materialized_views=0 Now it is fixed. */ From c25e9ecde35fad2b72f919fbdf54381fa184538f Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 30 May 2024 13:04:55 +0200 Subject: [PATCH 067/273] work with tests --- src/Processors/Sinks/SinkToStorage.cpp | 5 +++++ src/Processors/Transforms/NumberBlocksTransform.cpp | 8 +++++++- src/Processors/Transforms/NumberBlocksTransform.h | 2 ++ src/Processors/Transforms/buildPushingToViewsChain.cpp | 5 ++++- 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 36bb70f493f..c166ec81af7 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace DB @@ -16,6 +17,10 @@ void SinkToStorage::onConsume(Chunk chunk) Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); consume(chunk); + + // Add comment here + DeduplicationToken::SetInitialTokenTransform::setInitialToken(chunk); + cur_chunk = std::move(chunk); } diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/NumberBlocksTransform.cpp index 11054f652ff..d51fe67c868 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.cpp +++ b/src/Processors/Transforms/NumberBlocksTransform.cpp @@ -105,7 +105,7 @@ void CheckTokenTransform::transform(Chunk & chunk) LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, token: {}", debug, token_info->getToken(false)); } -void SetInitialTokenTransform::transform(Chunk & chunk) +void SetInitialTokenTransform::setInitialToken(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); @@ -127,6 +127,12 @@ void SetInitialTokenTransform::transform(Chunk & chunk) token_info->setInitialToken(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); } + +void SetInitialTokenTransform::transform(Chunk & chunk) +{ + setInitialToken(chunk); +} + void SetUserTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/NumberBlocksTransform.h index b4f61eb887c..a2e48d9b548 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/NumberBlocksTransform.h @@ -121,6 +121,8 @@ namespace DeduplicationToken String getName() const override { return "DeduplicationToken::SetInitialTokenTransform"; } void transform(Chunk & chunk) override; + + static void setInitialToken(Chunk & chunk); }; class ResetTokenTransform : public ISimpleTransform diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 996fe3efdc5..46ca109fe0f 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -552,8 +552,11 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); } + else + { + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + } - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); From 59a97713b06f1bb2ffd24087114dbff5a0eecee8 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 30 May 2024 16:37:59 +0200 Subject: [PATCH 068/273] work with tests --- src/Processors/Sinks/SinkToStorage.cpp | 5 ----- src/Processors/Transforms/buildPushingToViewsChain.cpp | 7 ++----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index c166ec81af7..36bb70f493f 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -1,5 +1,4 @@ #include -#include #include namespace DB @@ -17,10 +16,6 @@ void SinkToStorage::onConsume(Chunk chunk) Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); consume(chunk); - - // Add comment here - DeduplicationToken::SetInitialTokenTransform::setInitialToken(chunk); - cur_chunk = std::move(chunk); } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 9dc9531b7a1..7a32b6ff038 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -530,6 +530,8 @@ Chain buildPushingToViewsChain( result_chain = Chain(std::move(processors)); result_chain.setNumThreads(std::min(views_data->max_threads, max_parallel_streams)); result_chain.setConcurrencyControl(settings.use_concurrency_control); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } if (auto * live_view = dynamic_cast(storage.get())) @@ -552,11 +554,6 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); } - else - { - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); - } - if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); From 5fe2249300d2d5951329a39a49322bbd99cce614 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 31 May 2024 14:43:35 +0200 Subject: [PATCH 069/273] adjust tesy test_force_deduplication --- .../test_force_deduplication/test.py | 73 ++++++++++++++----- 1 file changed, 54 insertions(+), 19 deletions(-) diff --git a/tests/integration/test_force_deduplication/test.py b/tests/integration/test_force_deduplication/test.py index 87b2c45bbc5..14c11bc8500 100644 --- a/tests/integration/test_force_deduplication/test.py +++ b/tests/integration/test_force_deduplication/test.py @@ -29,6 +29,8 @@ def get_counts(): def test_basic(start_cluster): + old_src, old_a, old_b, old_c = 0, 0, 0, 0 + node.query( """ CREATE TABLE test (A Int64) ENGINE = ReplicatedMergeTree ('/clickhouse/test/tables/test','1') ORDER BY tuple(); @@ -39,6 +41,15 @@ def test_basic(start_cluster): INSERT INTO test values(999); """ ) + + src, a, b, c = get_counts() + assert src == old_src + 1 + assert a == old_a + 2 + assert b == old_b + 2 + assert c == old_c + 2 + old_src, old_a, old_b, old_c = src, a, b, c + + # that issert fails on test_mv_b due to partitions by A with pytest.raises(QueryRuntimeException): node.query( """ @@ -46,22 +57,23 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(10); """ ) + src, a, b, c = get_counts() + assert src == old_src + 10 + assert a == old_a + 10 + assert b == old_b + assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c - old_src, old_a, old_b, old_c = get_counts() - # number of rows in test_mv_a and test_mv_c depends on order of inserts into views - assert old_src == 11 - assert old_a in (1, 11) - assert old_b == 1 - assert old_c in (1, 11) - + # deduplication only for src table node.query("INSERT INTO test SELECT number FROM numbers(10)") src, a, b, c = get_counts() - # no changes because of deduplication in source table assert src == old_src - assert a == old_a - assert b == old_b - assert c == old_c + assert a == old_a + 10 + assert b == old_b + 10 + assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c + # deduplication for MV tables does not work, because previous inserts have not written their deduplications tokens to the log due to `deduplicate_blocks_in_dependent_materialized_views = 0`. node.query( """ SET deduplicate_blocks_in_dependent_materialized_views = 1; @@ -69,11 +81,27 @@ def test_basic(start_cluster): """ ) src, a, b, c = get_counts() - assert src == 11 - assert a == old_a + 10 # first insert could be succesfull with disabled dedup - assert b == 11 + assert src == old_src + assert a == old_a + 10 + assert b == old_b + 10 assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c + # deduplication for all the tables + node.query( + """ + SET deduplicate_blocks_in_dependent_materialized_views = 1; + INSERT INTO test SELECT number FROM numbers(10); + """ + ) + src, a, b, c = get_counts() + assert src == old_src + assert a == old_a + assert b == old_b + assert c == old_c + old_src, old_a, old_b, old_c = src, a, b, c + + # that issert fails on test_mv_b due to partitions by A, it is an uniq data which is not deduplicated with pytest.raises(QueryRuntimeException): node.query( """ @@ -82,16 +110,23 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(100,10); """ ) + src, a, b, c = get_counts() + assert src == old_src + 10 + assert a == old_a + 10 + assert b == old_b + assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c + # deduplication for all tables, except test_mv_b. For test_mv_b it is an uniq data which is not deduplicated due to exception at previous insert node.query( """ SET deduplicate_blocks_in_dependent_materialized_views = 1; INSERT INTO test SELECT number FROM numbers(100,10); """ ) - src, a, b, c = get_counts() - assert src == 21 - assert a == old_a + 20 - assert b == 21 - assert c == old_c + 20 + assert src == old_src + assert a == old_a + assert b == old_b + 10 + assert c == old_c + old_src, old_a, old_b, old_c = src, a, b, c From c3f72f0cf9180397359136941c7247a812576c61 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 31 May 2024 14:44:55 +0200 Subject: [PATCH 070/273] revert changes at helpers/s3_mocks/broken_s3.py --- tests/integration/helpers/s3_mocks/broken_s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py index 566d4739eb0..686abc76bdf 100644 --- a/tests/integration/helpers/s3_mocks/broken_s3.py +++ b/tests/integration/helpers/s3_mocks/broken_s3.py @@ -246,7 +246,7 @@ class _ServerRuntime: class BrokenPipeAction: def inject_error(self, request_handler): # partial read - request_handler.rfile.read(50) + self.rfile.read(50) time.sleep(1) request_handler.connection.setsockopt( From 2b3e1920ebfe32e99c2833acce357076a7480e40 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 31 May 2024 15:22:09 +0200 Subject: [PATCH 071/273] break tests to meet the timeout --- ...uplication_insert_several_blocks.reference | 1922 ----------------- ...008_deduplication_insert_several_blocks.sh | 97 - ...ert_several_blocks_nonreplicated.reference | 962 +++++++++ ...ion_insert_several_blocks_nonreplicated.sh | 58 + ...insert_several_blocks_replicated.reference | 962 +++++++++ ...cation_insert_several_blocks_replicated.sh | 58 + ...tion_mv_generates_several_blocks.reference | 1922 ----------------- ...duplication_mv_generates_several_blocks.sh | 103 - ...tes_several_blocks_nonreplicated.reference | 962 +++++++++ ..._generates_several_blocks_nonreplicated.sh | 58 + ...erates_several_blocks_replicated.reference | 962 +++++++++ ..._mv_generates_several_blocks_replicated.sh | 58 + ...cation_several_mv_into_one_table.reference | 1410 ------------ ...deduplication_several_mv_into_one_table.sh | 111 - ..._mv_into_one_table_nonreplicated.reference | 706 ++++++ ...several_mv_into_one_table_nonreplicated.sh | 58 + ...ral_mv_into_one_table_replicated.reference | 706 ++++++ ...on_several_mv_into_one_table_replicated.sh | 58 + 18 files changed, 5608 insertions(+), 5565 deletions(-) delete mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh delete mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh delete mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference deleted file mode 100644 index 641735d1bb6..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.reference +++ /dev/null @@ -1,1922 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -FIXED - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -FIXED - -Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -FIXED - -Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -FIXED - -Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -FIXED - -Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -FIXED - -Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh deleted file mode 100755 index ed50110b7eb..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks.sh +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -# fails, it is a error. Several blocks in scr table with the same user token are processed in parallel and deduplicated - -# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" -# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False" -# fails, it is a error. The same situation as first one, but on dst table. - -RUN_ONLY="" -#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -KNOWN_ERRORS=(8 9 10 11 12 13) - -function is_known_error() -{ - n=$1 - for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then - return 0 - fi - done - return 1 -} - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi - done - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference new file mode 100644 index 00000000000..bf900aa84d2 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh new file mode 100755 index 00000000000..c758e2fb3de --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="MergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference new file mode 100644 index 00000000000..c815324b455 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh new file mode 100755 index 00000000000..45b222b1fc4 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="ReplicatedMergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference deleted file mode 100644 index 06f30793670..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.reference +++ /dev/null @@ -1,1922 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -FIXED - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -FIXED - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -FIXED - -Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -FIXED - -Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -FIXED - -Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -FIXED - -Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -OK - -Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -OK - -Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -FIXED - -Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 90 -0 -0 -FIXED - -Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 45 -0 -0 -FIXED - -Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 47 -0 -0 -FIXED - -Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 47 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 94 -0 -0 -OK - -Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 45 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 90 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh deleted file mode 100755 index 61996905135..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks.sh +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -# failed due to race in multi thread insertion, blocks are deduplicated in different threads - -# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -# the same as first but for dst table - -# Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# dst table deduplicates all incoming blocks from one insert because not uniq hash - -RUN_ONLY="" -#RUN_ONLY="Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -KNOWN_ERRORS=(8 9 10 11 12 13 16 20 24 28) - -function is_known_error() -{ - n=$1 - for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then - return 0 - fi - done - return 1 -} - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi - done - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference new file mode 100644 index 00000000000..76ef4cf6b2c --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh new file mode 100755 index 00000000000..50cf2a3bb75 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="MergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference new file mode 100644 index 00000000000..a84539df16b --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 45 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 47 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 47 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 94 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 45 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 90 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh new file mode 100755 index 00000000000..2b094e0309e --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="ReplicatedMergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 20: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference deleted file mode 100644 index 4d517948a25..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.reference +++ /dev/null @@ -1,1410 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -FIXED - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -FIXED - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -FIXED - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -FIXED - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 32: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 33: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 34: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 35: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 36: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 37: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 38: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 39: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 40: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 41: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -FIXED - -Test case 42: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -FIXED - -Test case 43: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -FIXED - -Test case 44: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 45: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -FIXED - -Test case 46: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 47: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 48: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 49: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 50: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 51: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 53: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 54: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 55: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 56: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 57: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 58: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 59: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 60: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 61: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 62: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 63: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 64: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 65: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 66: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 67: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 68: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 69: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 70: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 71: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 72: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 73: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -FIXED - -Test case 74: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -FIXED - -Test case 75: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -FIXED - -Test case 76: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 77: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -FIXED - -Test case 78: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 79: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 80: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 81: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 82: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 83: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 84: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 85: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 86: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 87: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 88: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 89: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 90: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 91: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 92: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 93: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 94: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 95: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 96: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 97: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 98: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 99: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 100: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 101: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 102: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 103: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 104: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 105: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -FIXED - -Test case 106: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -FIXED - -Test case 107: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -FIXED - -Test case 108: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 109: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -FIXED - -Test case 110: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 111: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 112: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 113: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 114: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 115: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 116: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 117: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 118: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 119: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 120: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -FIXED - -Test case 121: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -FIXED - -Test case 122: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 123: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 124: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -FIXED - -Test case 125: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -FIXED - -Test case 126: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 127: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh deleted file mode 100755 index 3d2814ed77d..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table.sh +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Test case 8: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 9: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -# Test case 10: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -# Test case 11: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -# race condition on insert into src table - -# Test case 12: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# Test case 13: engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -# race condition on insert into dst table - -# Test case 16: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# Test case 24: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -# Test case 28: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -# dst deduplicates blocks from one inserts from different materialized view - -# Test case 17: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -# Test case 21: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -# Test case 25: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -# Test case 29: engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -# dst deduplicates blocks from different inserts by hash - -KNOWN_ERRORS=(8 9 10 11 12 13 16 20 24 28 17 21 25 29) - -function is_known_error() -{ - n=$1 - for e in "${KNOWN_ERRORS[@]}"; do - if [ "$n" -eq "$e" ] || [ "$n" -eq "$((e+32))" ] || [ "$n" -eq "$((e+64))" ] || [ "$n" -eq "$((e+64+32))" ]; then - return 0 - fi - done - return 1 -} - -RUN_ONLY="" -#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for engine in "MergeTree" "ReplicatedMergeTree"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$engine" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - is_error=$(is_known_error "$i" && echo Y || echo N) - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - if [ "$is_error" = Y ]; then - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " 2>/dev/null && echo FIXED || echo EXPECTED_TO_FAIL - else - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --insert-method $insert_method \ - --table-engine $engine \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - fi - done - done - done - done - done - done -done - -echo -echo "All cases executed" - - diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference new file mode 100644 index 00000000000..b6a3e0175a7 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference @@ -0,0 +1,706 @@ + +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh new file mode 100755 index 00000000000..33da54b90f1 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="MergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference new file mode 100644 index 00000000000..1921103f49e --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference @@ -0,0 +1,706 @@ + +Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh new file mode 100755 index 00000000000..290d1f794b2 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="ReplicatedMergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" From ddde0f5fed1a8d3f57e743f54b2d14dcdaf98908 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 31 May 2024 16:25:03 +0200 Subject: [PATCH 072/273] fix headers --- src/Common/CollectionOfDerived.h | 4 +- src/Interpreters/InterpreterInsertQuery.cpp | 50 +++++++++---------- src/Interpreters/SquashingTransform.cpp | 18 +++---- src/Processors/Chunk.h | 6 --- src/Processors/ISimpleTransform.h | 2 - .../Algorithms/ReplacingSortedAlgorithm.h | 3 +- src/Processors/Sinks/SinkToStorage.h | 3 -- .../Transforms/AggregatingInOrderTransform.h | 2 +- .../Transforms/AggregatingTransform.h | 3 +- .../Transforms/CountingTransform.cpp | 6 +-- src/Processors/Transforms/CountingTransform.h | 2 - ...m.cpp => DeduplicationTokenTransforms.cpp} | 12 ++++- ...sform.h => DeduplicationTokenTransforms.h} | 15 +----- src/Processors/Transforms/JoiningTransform.h | 5 +- .../Transforms/MaterializingTransform.cpp | 2 - ...ergingAggregatedMemoryEfficientTransform.h | 2 +- .../Transforms/SquashingChunksTransform.h | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 7 ++- src/Storages/LiveView/StorageLiveView.cpp | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 15 +++--- .../MergeTree/ReplicatedMergeTreeSink.cpp | 26 +++++----- src/Storages/StorageDistributed.cpp | 2 - src/Storages/StorageLog.cpp | 3 +- src/Storages/WindowView/StorageWindowView.cpp | 2 +- 24 files changed, 88 insertions(+), 106 deletions(-) rename src/Processors/Transforms/{NumberBlocksTransform.cpp => DeduplicationTokenTransforms.cpp} (91%) rename src/Processors/Transforms/{NumberBlocksTransform.h => DeduplicationTokenTransforms.h} (89%) diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h index c98e375b4b1..60a91e593f9 100644 --- a/src/Common/CollectionOfDerived.h +++ b/src/Common/CollectionOfDerived.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include #include @@ -41,7 +43,7 @@ private: using Records = std::vector; public: - void swap(Self & other) + void swap(Self & other) noexcept { records.swap(other.records); } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 249c69b51b9..758ac4ab954 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include @@ -309,8 +309,8 @@ Chain InterpreterInsertQuery::buildSink( ThreadGroupPtr running_group, std::atomic_uint64_t * elapsed_counter_ms) { - LOG_DEBUG(getLogger("InsertQuery"), - "called InterpreterInsertQuery::buildSink() engine {} table name {}.{}", table->getName(), table->getStorageID().database_name, table->getStorageID().table_name); + // LOG_DEBUG(getLogger("InsertQuery"), + // "called InterpreterInsertQuery::buildSink() engine {} table name {}.{}", table->getName(), table->getStorageID().database_name, table->getStorageID().table_name); ThreadStatus * thread_status = current_thread; @@ -413,9 +413,9 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < sink_streams; ++i) { - LOG_DEBUG(getLogger("InsertQuery"), - "call buildSink sink_streams table name {}.{}, stream {}/{}", - table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams); + // LOG_DEBUG(getLogger("InsertQuery"), + // "call buildSink sink_streams table name {}.{}, stream {}/{}", + // table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams); auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, running_group, /* elapsed_counter_ms= */ nullptr); @@ -425,9 +425,9 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < presink_streams; ++i) { - LOG_DEBUG(getLogger("InsertQuery"), - "call buildSink presink_streams table name {}.{}, stream {}/{}", - table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); + // LOG_DEBUG(getLogger("InsertQuery"), + // "call buildSink presink_streams table name {}.{}, stream {}/{}", + // table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); presink_chains.emplace_back(std::move(out)); @@ -462,8 +462,8 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & ContextPtr select_context = getContext(); - LOG_DEBUG(getLogger("InsertQuery"), - "execute() is_trivial_insert_select {} prefersLargeBlocks={} max_insert_threads {}", is_trivial_insert_select, table->prefersLargeBlocks(), settings.max_insert_threads); + // LOG_DEBUG(getLogger("InsertQuery"), + // "execute() is_trivial_insert_select {} prefersLargeBlocks={} max_insert_threads {}", is_trivial_insert_select, table->prefersLargeBlocks(), settings.max_insert_threads); if (is_trivial_insert_select) { @@ -511,9 +511,9 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & pipeline.dropTotalsAndExtremes(); - LOG_DEBUG(getLogger("InsertQuery"), - "adding transforms, pipline size {}, threads {}, max_insert_threads {}", - pipeline.getNumStreams(), pipeline.getNumThreads(), settings.max_insert_threads); + // LOG_DEBUG(getLogger("InsertQuery"), + // "adding transforms, pipline size {}, threads {}, max_insert_threads {}", + // pipeline.getNumStreams(), pipeline.getNumThreads(), settings.max_insert_threads); /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. @@ -743,13 +743,13 @@ BlockIO InterpreterInsertQuery::execute() StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); - bool is_table_dist = false; - if (auto * dist_storage = dynamic_cast(table.get())) - { - is_table_dist = true; - LOG_DEBUG(getLogger("InsertQuery"), - "dist_storage engine {} table name {}.{}", dist_storage->getName(), dist_storage->getStorageID().database_name, dist_storage->getStorageID().table_name); - } + // bool is_table_dist = false; + // if (auto * dist_storage = dynamic_cast(table.get())) + // { + // is_table_dist = true; + // // LOG_DEBUG(getLogger("InsertQuery"), + // // "dist_storage engine {} table name {}.{}", dist_storage->getName(), dist_storage->getStorageID().database_name, dist_storage->getStorageID().table_name); + // } if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); @@ -780,24 +780,24 @@ BlockIO InterpreterInsertQuery::execute() auto distributed = table->distributedWrite(query, getContext()); if (distributed) { - LOG_DEBUG(getLogger("InsertQuery"),"as dist pipeline, is_table_dist {}", is_table_dist); + // LOG_DEBUG(getLogger("InsertQuery"),"as dist pipeline, is_table_dist {}", is_table_dist); res.pipeline = std::move(*distributed); } else { - LOG_DEBUG(getLogger("InsertQuery"),"as insert select after dist, is_table_dist {}", is_table_dist); + // LOG_DEBUG(getLogger("InsertQuery"),"as insert select after dist, is_table_dist {}", is_table_dist); res.pipeline = buildInsertSelectPipeline(query, table); } } else { - LOG_DEBUG(getLogger("InsertQuery"),"as insert select, is_table_dist {}", is_table_dist); + // LOG_DEBUG(getLogger("InsertQuery"),"as insert select, is_table_dist {}", is_table_dist); res.pipeline = buildInsertSelectPipeline(query, table); } } else { - LOG_DEBUG(getLogger("InsertQuery"),"as just insert, is_table_dist {}", is_table_dist); + // LOG_DEBUG(getLogger("InsertQuery"),"as just insert, is_table_dist {}", is_table_dist); res.pipeline = buildInsertPipeline(query, table); } diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index 30c801aaaff..a539870d50c 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -72,10 +72,10 @@ void SquashingTransform::append(Block && input_block) return; } - LOG_DEBUG(getLogger("SquashingTransform"), - "input_block rows {}, size {}, columns {}, accumulated_block rows {}, size {}, columns {}, ", - input_block.rows(), input_block.bytes(), input_block.columns(), - accumulated_block.rows(), accumulated_block.bytes(), accumulated_block.columns()); + // LOG_DEBUG(getLogger("SquashingTransform"), + // "input_block rows {}, size {}, columns {}, accumulated_block rows {}, size {}, columns {}, ", + // input_block.rows(), input_block.bytes(), input_block.columns(), + // accumulated_block.rows(), accumulated_block.bytes(), accumulated_block.columns()); assert(blocksHaveEqualStructure(input_block, accumulated_block)); @@ -86,11 +86,11 @@ void SquashingTransform::append(Block && input_block) const auto source_column = std::move(input_block.getByPosition(i).column); auto acc_column = std::move(accumulated_block.getByPosition(i).column); - LOG_DEBUG(getLogger("SquashingTransform"), - "column {} {}, acc rows {}, size {}, allocated {}, input rows {} size {} allocated {}", - i, source_column->getName(), - acc_column->size(), acc_column->byteSize(), acc_column->allocatedBytes(), - source_column->size(), source_column->byteSize(), source_column->allocatedBytes()); + // LOG_DEBUG(getLogger("SquashingTransform"), + // "column {} {}, acc rows {}, size {}, allocated {}, input rows {} size {} allocated {}", + // i, source_column->getName(), + // acc_column->size(), acc_column->byteSize(), acc_column->allocatedBytes(), + // source_column->size(), source_column->byteSize(), source_column->allocatedBytes()); auto mutable_column = IColumn::mutate(std::move(acc_column)); diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index b4345d18a08..1348966c0d3 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -1,15 +1,9 @@ #pragma once -#include "base/defines.h" - #include #include -#include -#include #include -#include -#include namespace DB { diff --git a/src/Processors/ISimpleTransform.h b/src/Processors/ISimpleTransform.h index a47e0e49121..629529cdffa 100644 --- a/src/Processors/ISimpleTransform.h +++ b/src/Processors/ISimpleTransform.h @@ -2,8 +2,6 @@ #include -#include - namespace DB { diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index f36e07b8a96..2f23f2a5c4d 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -1,10 +1,9 @@ #pragma once -#include #include #include #include #include -#include "Processors/Chunk.h" +#include namespace Poco { diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index c350b9f79b0..c728fa87b1e 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -1,9 +1,6 @@ #pragma once -#include #include -#include #include -#include namespace DB { diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 6433f862dfd..41a0d7fc7f1 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -5,7 +5,7 @@ #include #include #include -#include "Processors/Chunk.h" +#include namespace DB { diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 430a9a6e50a..95983c39d1e 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -1,15 +1,14 @@ #pragma once -#include #include #include #include +#include #include #include #include #include #include #include -#include "Processors/Chunk.h" namespace CurrentMetrics { diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index d39c6575292..2c6b3bd8638 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -1,9 +1,9 @@ - -#include #include + +#include +#include #include #include -#include "IO/Progress.h" namespace ProfileEvents diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h index 4efcf147ac7..05d8e2aeac8 100644 --- a/src/Processors/Transforms/CountingTransform.h +++ b/src/Processors/Transforms/CountingTransform.h @@ -4,8 +4,6 @@ #include #include -#include - namespace DB { diff --git a/src/Processors/Transforms/NumberBlocksTransform.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp similarity index 91% rename from src/Processors/Transforms/NumberBlocksTransform.cpp rename to src/Processors/Transforms/DeduplicationTokenTransforms.cpp index d51fe67c868..ea4537bb5ad 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -1,4 +1,4 @@ -#include +#include #include @@ -18,6 +18,16 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +void RestoreChunkInfosTransform::transform(Chunk & chunk) +{ + LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "chunk infos before: {}:{}, append: {}:{}, chunk has rows {}", + chunk.getChunkInfos().size(), chunk.getChunkInfos().debug(), + chunk_infos.size(), chunk_infos.debug(), + chunk.getNumRows()); + + chunk.getChunkInfos().append(chunk_infos.clone()); +} + namespace DeduplicationToken { diff --git a/src/Processors/Transforms/NumberBlocksTransform.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h similarity index 89% rename from src/Processors/Transforms/NumberBlocksTransform.h rename to src/Processors/Transforms/DeduplicationTokenTransforms.h index a2e48d9b548..f0bcc3052f7 100644 --- a/src/Processors/Transforms/NumberBlocksTransform.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -14,22 +14,11 @@ namespace DB RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_) : ISimpleTransform(header_, header_, true) , chunk_infos(std::move(chunk_infos_)) - { - LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "create RestoreChunkInfosTransform to append {}:{}", - chunk_infos.size(), chunk_infos.debug()); - } + {} String getName() const override { return "RestoreChunkInfosTransform"; } - void transform(Chunk & chunk) override - { - LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "chunk infos before: {}:{}, append: {}:{}, chunk has rows {}", - chunk.getChunkInfos().size(), chunk.getChunkInfos().debug(), - chunk_infos.size(), chunk_infos.debug(), - chunk.getNumRows()); - - chunk.getChunkInfos().append(chunk_infos.clone()); - } + void transform(Chunk & chunk) override; private: Chunk::ChunkInfoCollection chunk_infos; diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index 5fdea2524e2..5f6d9d6fff2 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -1,8 +1,7 @@ #pragma once -#include #include -#include "Processors/Chunk.h" - +#include +#include namespace DB { diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index 4a7f5187c75..9ae80e21a68 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -1,8 +1,6 @@ #include #include -#include - namespace DB { diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h index 958b43b11ed..3a3c1bd9c1e 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h @@ -2,8 +2,8 @@ #include #include -#include "Processors/Chunk.h" #include +#include #include #include #include diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index f0334549d4c..860e84f2cd3 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -4,7 +4,7 @@ #include #include #include -#include "Processors/Chunk.h" +#include namespace DB { diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 7a32b6ff038..bef00fa3f1d 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -5,7 +5,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -15,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -24,9 +25,7 @@ #include #include #include -#include "Core/Field.h" -#include -#include +#include #include #include diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index b9d29a90f56..dd20bea4dd6 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -22,12 +22,12 @@ limitations under the License. */ #include #include #include +#include #include #include #include #include #include -#include "Processors/Transforms/NumberBlocksTransform.h" #include #include diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 0953cdc5d72..ba81bb7a56d 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,12 +1,13 @@ -#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include "Common/Exception.h" -#include -#include "Interpreters/StorageID.h" + +#include namespace ProfileEvents { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 62d30764ca8..16bb9827c6e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -1,23 +1,25 @@ -#include -#include -#include -#include -#include -#include #include "Common/Exception.h" #include #include #include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include +#include +#include +#include + #include +#include + namespace ProfileEvents { diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 257c8c312e5..5e03840fa36 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -23,7 +23,6 @@ #include -#include "Common/logger_useful.h" #include #include #include @@ -107,7 +106,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 8b1bf4637b4..1a84f578cf8 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -1,7 +1,7 @@ #include #include +#include -#include "Common/logger_useful.h" #include #include #include @@ -22,7 +22,6 @@ #include #include -#include "StorageLogSettings.h" #include #include #include diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 4ae91d64023..17ecba2b4a5 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -32,12 +32,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include #include From 6dfd226daa8421055f3a1103fa72323c68c71959 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 4 Jun 2024 17:27:13 +0200 Subject: [PATCH 073/273] fix populate --- .../DeduplicationTokenTransforms.cpp | 29 ++++++++++--------- .../Transforms/DeduplicationTokenTransforms.h | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 15 ++++++++-- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index ea4537bb5ad..4f822e4aebb 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -67,6 +67,9 @@ void TokenInfo::setSourceBlockNumber(size_t sbn) void TokenInfo::setViewID(const String & id) { + LOG_DEBUG(getLogger("TokenInfo"), + "token: {}, stage: {}, view id: {}", + getToken(false), stage, id); chassert(stage == VIEW_ID); addTokenPart(fmt::format(":view-id-{}", id)); stage = VIEW_BLOCK_NUMBER; @@ -115,7 +118,18 @@ void CheckTokenTransform::transform(Chunk & chunk) LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, token: {}", debug, token_info->getToken(false)); } -void SetInitialTokenTransform::setInitialToken(Chunk & chunk) +String SetInitialTokenTransform::getInitialToken(const Chunk & chunk) +{ + SipHash hash; + for (const auto & colunm : chunk.getColumns()) + colunm->updateHashFast(hash); + + const auto hash_value = hash.get128(); + return toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); +} + + +void SetInitialTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); @@ -129,18 +143,7 @@ void SetInitialTokenTransform::setInitialToken(Chunk & chunk) if (token_info->tokenInitialized()) return; - SipHash hash; - for (const auto & colunm : chunk.getColumns()) - colunm->updateHashFast(hash); - - const auto hash_value = hash.get128(); - token_info->setInitialToken(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); -} - - -void SetInitialTokenTransform::transform(Chunk & chunk) -{ - setInitialToken(chunk); + token_info->setInitialToken(getInitialToken(chunk)); } void SetUserTokenTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index f0bcc3052f7..46d355eb487 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -111,7 +111,7 @@ namespace DeduplicationToken void transform(Chunk & chunk) override; - static void setInitialToken(Chunk & chunk); + static String getInitialToken(const Chunk & chunk); }; class ResetTokenTransform : public ISimpleTransform diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index bef00fa3f1d..b259e803f80 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -529,8 +529,6 @@ Chain buildPushingToViewsChain( result_chain = Chain(std::move(processors)); result_chain.setNumThreads(std::min(views_data->max_threads, max_parallel_streams)); result_chain.setConcurrencyControl(settings.use_concurrency_control); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } if (auto * live_view = dynamic_cast(storage.get())) @@ -538,12 +536,25 @@ Chain buildPushingToViewsChain( auto sink = std::make_shared(live_view_header, *live_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (auto * window_view = dynamic_cast(storage.get())) { auto sink = std::make_shared(window_view->getInputHeader(), *window_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + } + else if (dynamic_cast(storage.get())) + { + auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); + metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); + sink->setRuntimeData(thread_status, elapsed_counter_ms); + result_chain.addSource(std::move(sink)); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } /// Do not push to destination table if the flag is set else if (!no_destination) From d72fac13ec7d02d35e49be8f799c82c4762b242b Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 4 Jun 2024 18:58:05 +0200 Subject: [PATCH 074/273] mark insert block with a set of block ids from all partitions --- .../DeduplicationTokenTransforms.cpp | 26 ++++++-- .../Transforms/DeduplicationTokenTransforms.h | 3 +- src/Storages/MergeTree/MergeTreeSink.cpp | 7 ++- .../MergeTree/ReplicatedMergeTreeSink.cpp | 7 ++- ...on_insert_into_partitioned_table.reference | 35 +++++++++++ ...lication_insert_into_partitioned_table.sql | 63 +++++++++++++++++++ 6 files changed, 132 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index 4f822e4aebb..dba6fc40b11 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -39,15 +39,24 @@ String DB::DeduplicationToken::TokenInfo::getToken(bool enable_assert) const result.reserve(getTotalSize()); for (const auto & part : parts) + { + if (!result.empty()) + result.append(":"); result.append(part); + } return result; } -void DB::DeduplicationToken::TokenInfo::setInitialToken(String part) +void DB::DeduplicationToken::TokenInfo::addPieceToInitialToken(String part) { chassert(stage == INITIAL); addTokenPart(std::move(part)); +} + +void DB::DeduplicationToken::TokenInfo::closeInitialToken() +{ + chassert(stage == INITIAL); stage = VIEW_ID; } @@ -61,7 +70,7 @@ void TokenInfo::setUserToken(const String & token) void TokenInfo::setSourceBlockNumber(size_t sbn) { chassert(stage == SOURCE_BLOCK_NUMBER); - addTokenPart(fmt::format(":source-number-{}", sbn)); + addTokenPart(fmt::format("source-number-{}", sbn)); stage = VIEW_ID; } @@ -71,14 +80,14 @@ void TokenInfo::setViewID(const String & id) "token: {}, stage: {}, view id: {}", getToken(false), stage, id); chassert(stage == VIEW_ID); - addTokenPart(fmt::format(":view-id-{}", id)); + addTokenPart(fmt::format("view-id-{}", id)); stage = VIEW_BLOCK_NUMBER; } void TokenInfo::setViewBlockNumber(size_t mvbn) { chassert(stage == VIEW_BLOCK_NUMBER); - addTokenPart(fmt::format(":view-block-{}", mvbn)); + addTokenPart(fmt::format("view-block-{}", mvbn)); stage = VIEW_ID; } @@ -96,10 +105,14 @@ void TokenInfo::addTokenPart(String part) size_t TokenInfo::getTotalSize() const { + if (parts.empty()) + return 0; + size_t size = 0; for (const auto & part : parts) size += part.size(); - return size; + + return size + parts.size() - 1; } void CheckTokenTransform::transform(Chunk & chunk) @@ -143,7 +156,8 @@ void SetInitialTokenTransform::transform(Chunk & chunk) if (token_info->tokenInitialized()) return; - token_info->setInitialToken(getInitialToken(chunk)); + token_info->addPieceToInitialToken(getInitialToken(chunk)); + token_info->closeInitialToken(); } void SetUserTokenTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 46d355eb487..27bb21dfad1 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -38,7 +38,8 @@ namespace DeduplicationToken bool empty() const { return parts.empty(); } bool tokenInitialized() const { return stage != INITIAL && stage != SOURCE_BLOCK_NUMBER; } - void setInitialToken(String part); + void addPieceToInitialToken(String part); + void closeInitialToken(); void setUserToken(const String & token); void setSourceBlockNumber(size_t sbn); void setViewID(const String & id); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index ba81bb7a56d..b31e7e6a562 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -147,7 +147,7 @@ void MergeTreeSink::consume(Chunk & chunk) if (!token_info->tokenInitialized()) { chassert(temp_part.part); - token_info->setInitialToken(temp_part.part->getPartBlockIDHash()); + token_info->addPieceToInitialToken(temp_part.part->getPartBlockIDHash()); } if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) @@ -194,6 +194,11 @@ void MergeTreeSink::consume(Chunk & chunk) }); } + if (!token_info->tokenInitialized()) + { + token_info->closeInitialToken(); + } + finishDelayedChunk(); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 16bb9827c6e..8cb4095f1e6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -393,7 +393,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if (!token_info->tokenInitialized()) { chassert(temp_part.part); - token_info->setInitialToken(temp_part.part->getPartBlockIDHash()); + token_info->addPieceToInitialToken(temp_part.part->getPartBlockIDHash()); } } @@ -440,6 +440,11 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) )); } + if (!token_info->tokenInitialized()) + { + token_info->closeInitialToken(); + } + finishDelayedChunk(zookeeper); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference new file mode 100644 index 00000000000..e69cf2be182 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference @@ -0,0 +1,35 @@ +no user deduplication token +partitioned_table: +1 A +1 D +2 B +2 C +mv_table: +1 A +1 A +1 D +2 B +2 B +2 C +with user deduplication token +partitioned_table: +1 A +1 A +1 D +2 B +2 B +2 C +mv_table: +1 A +1 A +1 D +2 B +2 B +2 C +with incorrect ussage of user deduplication token +partitioned_table: +1 A +2 B +mv_table: +1 A +2 B diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql new file mode 100644 index 00000000000..918b7f2553d --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql @@ -0,0 +1,63 @@ +DROP TABLE IF EXISTS partitioned_table; +DROP TABLE IF EXISTS mv_table; + +CREATE TABLE partitioned_table + (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') + partition by key % 10 + order by tuple(); + +CREATE MATERIALIZED VIEW mv_table (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') + ORDER BY tuple() + AS SELECT key, value FROM partitioned_table; + +SET deduplicate_blocks_in_dependent_materialized_views = 1; + + +SELECT 'no user deduplication token'; + +INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'B'); +INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'C'); +INSERT INTO partitioned_table VALUES (1, 'D'), (2, 'B'); + +SELECT 'partitioned_table is deduplicated bacause deduplication works in scope of one partiotion:'; +SELECT * FROM partitioned_table ORDER BY ALL; +SELECT 'mv_table is not deduplicated because the inserted blocks was different:'; +SELECT * FROM mv_table ORDER BY ALL; + +TRUNCATE TABLE partitioned_table; +TRUNCATE TABLE mv_table; + + +SELECT 'with user deduplication token'; + +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_1' VALUES (1, 'A'), (2, 'B'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_2' VALUES (1, 'A'), (2, 'C'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_3' VALUES (1, 'D'), (2, 'B'); + +SELECT 'partitioned_table is not deduplicated because different tokens:'; +SELECT * FROM partitioned_table ORDER BY ALL; +SELECT 'mv_table is not deduplicated because different tokens:'; +SELECT * FROM mv_table ORDER BY ALL; + +TRUNCATE TABLE partitioned_table; +TRUNCATE TABLE mv_table; + + +SELECT 'with incorrect ussage of user deduplication token'; + +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'B'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'C'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'D'), (2, 'B'); + +SELECT 'partitioned_table is deduplicated because equal tokens:'; +SELECT * FROM partitioned_table ORDER BY ALL; +SELECT 'mv_table is deduplicated because equal tokens:'; +SELECT * FROM mv_table ORDER BY ALL; + +TRUNCATE TABLE partitioned_table; +TRUNCATE TABLE mv_table; + +DROP TABLE partitioned_table; +DROP TABLE mv_table; From dbc07ec573d3310b4f5019b8887fd34288bf23cd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 4 Jun 2024 20:28:38 +0200 Subject: [PATCH 075/273] adjust tests --- ...02912_ingestion_mv_deduplication.reference | 2 +- .../02912_ingestion_mv_deduplication.sql | 2 +- ...on_insert_into_partitioned_table.reference | 12 ++--- ...lication_insert_into_partitioned_table.sql | 44 ++++++++++++++----- 4 files changed, 40 insertions(+), 20 deletions(-) diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference index ae82b9c0463..07deb7c2565 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference @@ -17,7 +17,7 @@ 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 42 +2022-09-01 12:00:00 84 2023-09-01 12:00:00 42 -- Regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184 -- Landing (Agg/Replacing)MergeTree diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql index 450d92476a9..a2378fd8f67 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql @@ -98,7 +98,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view This is what happens now: - 1st insert works for landing and mv tables - - 2nd insert gets first block 20220901 deduplicated and second one inserted for landing and mv tables + - 2nd insert gets first block 20220901 deduplicated for landing and both rows are inserted for mv tables */ SET deduplicate_blocks_in_dependent_materialized_views = 1, max_insert_delayed_streams_for_parallel_write = 1000; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference index e69cf2be182..c82a6eaa213 100644 --- a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference +++ b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference @@ -1,10 +1,10 @@ no user deduplication token -partitioned_table: +partitioned_table is deduplicated bacause deduplication works in scope of one partiotion: 1 A 1 D 2 B 2 C -mv_table: +mv_table is not deduplicated because the inserted blocks was different: 1 A 1 A 1 D @@ -12,14 +12,14 @@ mv_table: 2 B 2 C with user deduplication token -partitioned_table: +partitioned_table is not deduplicated because different tokens: 1 A 1 A 1 D 2 B 2 B 2 C -mv_table: +mv_table is not deduplicated because different tokens: 1 A 1 A 1 D @@ -27,9 +27,9 @@ mv_table: 2 B 2 C with incorrect ussage of user deduplication token -partitioned_table: +partitioned_table is deduplicated because equal tokens: 1 A 2 B -mv_table: +mv_table is deduplicated because equal tokens: 1 A 2 B diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql index 918b7f2553d..2eb931f7f73 100644 --- a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql +++ b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql @@ -1,6 +1,12 @@ DROP TABLE IF EXISTS partitioned_table; DROP TABLE IF EXISTS mv_table; + +SET deduplicate_blocks_in_dependent_materialized_views = 1; + + +SELECT 'no user deduplication token'; + CREATE TABLE partitioned_table (key Int64, value String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') @@ -12,11 +18,6 @@ CREATE MATERIALIZED VIEW mv_table (key Int64, value String) ORDER BY tuple() AS SELECT key, value FROM partitioned_table; -SET deduplicate_blocks_in_dependent_materialized_views = 1; - - -SELECT 'no user deduplication token'; - INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'B'); INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'C'); INSERT INTO partitioned_table VALUES (1, 'D'), (2, 'B'); @@ -26,12 +27,23 @@ SELECT * FROM partitioned_table ORDER BY ALL; SELECT 'mv_table is not deduplicated because the inserted blocks was different:'; SELECT * FROM mv_table ORDER BY ALL; -TRUNCATE TABLE partitioned_table; -TRUNCATE TABLE mv_table; +DROP TABLE partitioned_table; +DROP TABLE mv_table; SELECT 'with user deduplication token'; +CREATE TABLE partitioned_table + (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') + partition by key % 10 + order by tuple(); + +CREATE MATERIALIZED VIEW mv_table (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') + ORDER BY tuple() + AS SELECT key, value FROM partitioned_table; + INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_1' VALUES (1, 'A'), (2, 'B'); INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_2' VALUES (1, 'A'), (2, 'C'); INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_3' VALUES (1, 'D'), (2, 'B'); @@ -41,12 +53,23 @@ SELECT * FROM partitioned_table ORDER BY ALL; SELECT 'mv_table is not deduplicated because different tokens:'; SELECT * FROM mv_table ORDER BY ALL; -TRUNCATE TABLE partitioned_table; -TRUNCATE TABLE mv_table; +DROP TABLE partitioned_table; +DROP TABLE mv_table; SELECT 'with incorrect ussage of user deduplication token'; +CREATE TABLE partitioned_table + (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') + partition by key % 10 + order by tuple(); + +CREATE MATERIALIZED VIEW mv_table (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') + ORDER BY tuple() + AS SELECT key, value FROM partitioned_table; + INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'B'); INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'C'); INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'D'), (2, 'B'); @@ -56,8 +79,5 @@ SELECT * FROM partitioned_table ORDER BY ALL; SELECT 'mv_table is deduplicated because equal tokens:'; SELECT * FROM mv_table ORDER BY ALL; -TRUNCATE TABLE partitioned_table; -TRUNCATE TABLE mv_table; - DROP TABLE partitioned_table; DROP TABLE mv_table; From 273571c6f519b99c556b3b443b391e5dc592a682 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 7 Jun 2024 19:05:19 +0200 Subject: [PATCH 076/273] fix tests --- src/Processors/Transforms/buildPushingToViewsChain.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index b259e803f80..8ba172bf32b 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -562,6 +562,9 @@ Chain buildPushingToViewsChain( auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); sink->setRuntimeData(thread_status, elapsed_counter_ms); + + result_chain.addSource(std::make_shared(sink->getHeader())); + result_chain.addSource(std::move(sink)); } From 3db3b365ea46ee1fc388a1788ce59b9426b99c71 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 10 Jun 2024 15:42:13 +0200 Subject: [PATCH 077/273] fix tests --- src/Processors/Transforms/buildPushingToViewsChain.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 8ba172bf32b..ed44a20e397 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -567,6 +567,10 @@ Chain buildPushingToViewsChain( result_chain.addSource(std::move(sink)); } + else + { + result_chain.addSource(std::make_shared(storage_header)); + } if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); From bdcf3a0739580c8c1e9689dfa416b2fae07feed7 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 10 Jun 2024 22:16:26 +0200 Subject: [PATCH 078/273] fix tidy build --- src/Storages/MergeTree/MergeTreeSink.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index b31e7e6a562..faf3267a759 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -189,7 +189,7 @@ void MergeTreeSink::consume(Chunk & chunk) { .temp_part = std::move(temp_part), .elapsed_ns = elapsed_ns, - .block_dedup_token = std::move(block_dedup_token), + .block_dedup_token = block_dedup_token, .part_counters = std::move(part_counters), }); } From 24bf946c00bc9e681ec4b26dbdae0a7a786bf355 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 11 Jun 2024 19:44:49 +0200 Subject: [PATCH 079/273] rm debug printing --- src/Core/Settings.h | 1 - src/Interpreters/InterpreterInsertQuery.cpp | 31 ----------------- src/Interpreters/SquashingTransform.cpp | 12 ------- .../DeduplicationTokenTransforms.cpp | 11 ------ .../Transforms/ExpressionTransform.cpp | 2 -- .../Transforms/SquashingChunksTransform.cpp | 18 ---------- src/Storages/MergeTree/MergeTreeSink.cpp | 34 ------------------- .../MergeTree/MergedBlockOutputStream.cpp | 3 -- .../MergeTree/ReplicatedMergeTreeSink.cpp | 13 ------- src/Storages/WindowView/StorageWindowView.cpp | 11 ------ 10 files changed, 136 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4128f24052b..d6779a531ae 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -626,7 +626,6 @@ class IColumn; M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ - M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 758ac4ab954..64fccdbe14d 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -309,9 +309,6 @@ Chain InterpreterInsertQuery::buildSink( ThreadGroupPtr running_group, std::atomic_uint64_t * elapsed_counter_ms) { - // LOG_DEBUG(getLogger("InsertQuery"), - // "called InterpreterInsertQuery::buildSink() engine {} table name {}.{}", table->getName(), table->getStorageID().database_name, table->getStorageID().table_name); - ThreadStatus * thread_status = current_thread; if (!thread_status_holder) @@ -413,10 +410,6 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < sink_streams; ++i) { - // LOG_DEBUG(getLogger("InsertQuery"), - // "call buildSink sink_streams table name {}.{}, stream {}/{}", - // table->getStorageID().database_name, table->getStorageID().table_name, i, sink_streams); - auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, running_group, /* elapsed_counter_ms= */ nullptr); @@ -425,10 +418,6 @@ std::pair, std::vector> InterpreterInsertQuery::buildP for (size_t i = 0; i < presink_streams; ++i) { - // LOG_DEBUG(getLogger("InsertQuery"), - // "call buildSink presink_streams table name {}.{}, stream {}/{}", - // table->getStorageID().database_name, table->getStorageID().table_name, i, presink_streams); - auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); presink_chains.emplace_back(std::move(out)); } @@ -462,9 +451,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & ContextPtr select_context = getContext(); - // LOG_DEBUG(getLogger("InsertQuery"), - // "execute() is_trivial_insert_select {} prefersLargeBlocks={} max_insert_threads {}", is_trivial_insert_select, table->prefersLargeBlocks(), settings.max_insert_threads); - if (is_trivial_insert_select) { /** When doing trivial INSERT INTO ... SELECT ... FROM table, @@ -511,11 +497,6 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & pipeline.dropTotalsAndExtremes(); - // LOG_DEBUG(getLogger("InsertQuery"), - // "adding transforms, pipline size {}, threads {}, max_insert_threads {}", - // pipeline.getNumStreams(), pipeline.getNumThreads(), settings.max_insert_threads); - - /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. if (getContext()->getSettingsRef().insert_null_as_default) { @@ -743,14 +724,6 @@ BlockIO InterpreterInsertQuery::execute() StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); - // bool is_table_dist = false; - // if (auto * dist_storage = dynamic_cast(table.get())) - // { - // is_table_dist = true; - // // LOG_DEBUG(getLogger("InsertQuery"), - // // "dist_storage engine {} table name {}.{}", dist_storage->getName(), dist_storage->getStorageID().database_name, dist_storage->getStorageID().table_name); - // } - if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); @@ -780,24 +753,20 @@ BlockIO InterpreterInsertQuery::execute() auto distributed = table->distributedWrite(query, getContext()); if (distributed) { - // LOG_DEBUG(getLogger("InsertQuery"),"as dist pipeline, is_table_dist {}", is_table_dist); res.pipeline = std::move(*distributed); } else { - // LOG_DEBUG(getLogger("InsertQuery"),"as insert select after dist, is_table_dist {}", is_table_dist); res.pipeline = buildInsertSelectPipeline(query, table); } } else { - // LOG_DEBUG(getLogger("InsertQuery"),"as insert select, is_table_dist {}", is_table_dist); res.pipeline = buildInsertSelectPipeline(query, table); } } else { - // LOG_DEBUG(getLogger("InsertQuery"),"as just insert, is_table_dist {}", is_table_dist); res.pipeline = buildInsertPipeline(query, table); } diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index a539870d50c..27437d1b647 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -72,11 +72,6 @@ void SquashingTransform::append(Block && input_block) return; } - // LOG_DEBUG(getLogger("SquashingTransform"), - // "input_block rows {}, size {}, columns {}, accumulated_block rows {}, size {}, columns {}, ", - // input_block.rows(), input_block.bytes(), input_block.columns(), - // accumulated_block.rows(), accumulated_block.bytes(), accumulated_block.columns()); - assert(blocksHaveEqualStructure(input_block, accumulated_block)); try @@ -86,13 +81,6 @@ void SquashingTransform::append(Block && input_block) const auto source_column = std::move(input_block.getByPosition(i).column); auto acc_column = std::move(accumulated_block.getByPosition(i).column); - // LOG_DEBUG(getLogger("SquashingTransform"), - // "column {} {}, acc rows {}, size {}, allocated {}, input rows {} size {} allocated {}", - // i, source_column->getName(), - // acc_column->size(), acc_column->byteSize(), acc_column->allocatedBytes(), - // source_column->size(), source_column->byteSize(), source_column->allocatedBytes()); - - auto mutable_column = IColumn::mutate(std::move(acc_column)); mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); accumulated_block.getByPosition(i).column = std::move(mutable_column); diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index dba6fc40b11..0701e958877 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -20,11 +20,6 @@ namespace ErrorCodes void RestoreChunkInfosTransform::transform(Chunk & chunk) { - LOG_TRACE(getLogger("RestoreChunkInfosTransform"), "chunk infos before: {}:{}, append: {}:{}, chunk has rows {}", - chunk.getChunkInfos().size(), chunk.getChunkInfos().debug(), - chunk_infos.size(), chunk_infos.debug(), - chunk.getNumRows()); - chunk.getChunkInfos().append(chunk_infos.clone()); } @@ -76,9 +71,6 @@ void TokenInfo::setSourceBlockNumber(size_t sbn) void TokenInfo::setViewID(const String & id) { - LOG_DEBUG(getLogger("TokenInfo"), - "token: {}, stage: {}, view id: {}", - getToken(false), stage, id); chassert(stage == VIEW_ID); addTokenPart(fmt::format("view-id-{}", id)); stage = VIEW_BLOCK_NUMBER; @@ -146,8 +138,6 @@ void SetInitialTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); - LOG_DEBUG(getLogger("SetInitialTokenTransform"), "has token_info {}", bool(token_info)); - if (!token_info) throw Exception( ErrorCodes::LOGICAL_ERROR, @@ -208,7 +198,6 @@ void ResetTokenTransform::transform(Chunk & chunk) ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in ResetTokenTransform"); - LOG_DEBUG(getLogger("ResetTokenTransform"), "token_info was {}", token_info->getToken(false)); token_info->reset(); } diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 73d41828bc0..04fabc9a3c6 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -1,8 +1,6 @@ #include #include -#include - namespace DB { diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 531d264a25a..75228eb5c2d 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -17,9 +17,6 @@ SquashingChunksTransform::SquashingChunksTransform( void SquashingChunksTransform::onConsume(Chunk chunk) { - // LOG_DEBUG(getLogger("SquashingChunksTransform"), - // "onConsume {}", chunk.getNumRows()); - auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); cur_chunk = Chunk(result.block.getColumns(), result.block.rows()); @@ -36,11 +33,6 @@ void SquashingChunksTransform::onConsume(Chunk chunk) cur_chunk.setChunkInfos(chunk.getChunkInfos()); cur_chunkinfos = {}; } - - // LOG_DEBUG(getLogger("SquashingChunksTransform"), - // "got result rows {}, size {}, columns {}, infos: {}/{}", - // cur_chunk.getNumRows(), cur_chunk.bytes(), cur_chunk.getNumColumns(), - // cur_chunk.getChunkInfos().size(), cur_chunk.getChunkInfos().debug()); } else { @@ -90,11 +82,6 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( void SimpleSquashingChunksTransform::consume(Chunk chunk) { - // LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - // "transform rows {}, size {}, columns {}, infos: {}/{}", - // chunk.getNumRows(), chunk.bytes(), chunk.getNumColumns(), - // chunk.getChunkInfos().size(), chunk.getChunkInfos().debug()); - auto result = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); if (result.block) @@ -110,11 +97,6 @@ void SimpleSquashingChunksTransform::consume(Chunk chunk) squashed_chunk.setChunkInfos(chunk.getChunkInfos()); squashed_info = {}; } - - // LOG_DEBUG(getLogger("SimpleSquashingChunksTransform"), - // "got result rows {}, size {}, columns {}, infos: {}/{}", - // squashed_chunk.getNumRows(), squashed_chunk.bytes(), squashed_chunk.getNumColumns(), - // squashed_chunk.getChunkInfos().size(), squashed_chunk.getChunkInfos().debug()); } else { diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index faf3267a759..1fdcd4c5b74 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -50,8 +50,6 @@ MergeTreeSink::MergeTreeSink( , context(context_) , storage_snapshot(storage.getStorageSnapshotWithoutData(metadata_snapshot, context_)) { - LOG_INFO(storage.log, "MergeTreeSink() called for {}.{}", - storage_.getStorageID().database_name, storage_.getStorageID().getTableName()); } void MergeTreeSink::onStart() @@ -68,10 +66,6 @@ void MergeTreeSink::onFinish() void MergeTreeSink::consume(Chunk & chunk) { - LOG_INFO(storage.log, "consume() called num_blocks_processed {}, chunks: rows {} columns {} bytes {}", - num_blocks_processed, - chunk.getNumRows(), chunk.getNumColumns(), chunk.bytes()); - if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(nullptr, context, false); @@ -81,8 +75,6 @@ void MergeTreeSink::consume(Chunk & chunk) auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context); - LOG_INFO(storage.log, "consume() called part_blocks.count {}", part_blocks.size()); - using DelayedPartitions = std::vector; DelayedPartitions partitions; @@ -106,18 +98,7 @@ void MergeTreeSink::consume(Chunk & chunk) context->getSettingsRef().insert_deduplication_token.value); if (token_info->tokenInitialized()) - { block_dedup_token = token_info->getToken(); - - LOG_DEBUG(storage.log, - "dedup token from insert deduplication token in chunk: {}", - block_dedup_token); - } - else - { - LOG_DEBUG(storage.log, - "dedup token from hash is calculated"); - } } for (auto & current_block : part_blocks) @@ -162,13 +143,6 @@ void MergeTreeSink::consume(Chunk & chunk) else max_insert_delayed_streams_for_parallel_write = 0; - LOG_INFO(storage.log, "consume() called for {}.{} " - "streams {} + {} -> {}, " - "max {} support_parallel_write {}", - storage.getStorageID().database_name, storage.getStorageID().getTableName(), - streams, temp_part.streams.size(), streams + temp_part.streams.size(), - max_insert_delayed_streams_for_parallel_write, support_parallel_write); - /// In case of too much columns/parts in block, flush explicitly. streams += temp_part.streams.size(); @@ -211,12 +185,8 @@ void MergeTreeSink::finishDelayedChunk() if (!delayed_chunk) return; - LOG_INFO(storage.log, "finishDelayedChunk() called partitions count {}", delayed_chunk->partitions.size()); - for (auto & partition : delayed_chunk->partitions) { - LOG_INFO(storage.log, "finishDelayedChunk() part name {} dedup_token {}", partition.temp_part.part->name, partition.block_dedup_token); - ProfileEventsScope scoped_attach(&partition.part_counters); partition.temp_part.finalize(); @@ -234,14 +204,10 @@ void MergeTreeSink::finishDelayedChunk() auto * deduplication_log = storage.getDeduplicationLog(); - LOG_INFO(storage.log, "finishDelayedChunk() has dedup log {}", bool(deduplication_log)); - if (deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); - LOG_INFO(storage.log, "finishDelayedChunk() block_dedup_token={}, block_id={}", partition.block_dedup_token, block_id); - auto res = deduplication_log->addPart(block_id, part->info); if (!res.second) { diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 09cdc6a78bc..c5799fab09f 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -336,9 +336,6 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Permutation * permutation) { - LOG_DEBUG(getLogger("MergedBlockOutputStream()"), "writeImpl block rows {} size {} getPartDirectory {}", - block.rows(), block.bytes(), data_part_storage->getPartDirectory()); - block.checkNumberOfRows(); size_t rows = block.rows(); if (!rows) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 8cb4095f1e6..cf3af59118e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -311,20 +311,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) context->getSettingsRef().insert_deduplication_token.value); if (token_info->tokenInitialized()) - { - /// multiple blocks can be inserted within the same insert query - /// an ordinal number is added to dedup token to generate a distinctive block id for each block block_dedup_token = token_info->getToken(); - - LOG_DEBUG(storage.log, - "dedup token from insert deduplication token in chunk: {}", - block_dedup_token); - } - else - { - LOG_DEBUG(storage.log, - "dedup token from hash is calculated"); - } } auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 17ecba2b4a5..d4f6621b4fc 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1415,11 +1415,6 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) void StorageWindowView::writeIntoWindowView( StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) { - LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: rows {}, infos {} with {}, window column {}", - block.rows(), - chunk_infos.size(), chunk_infos.debug(), - window_view.timestamp_column_name); - window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) std::this_thread::sleep_for(std::chrono::milliseconds(100)); @@ -1464,9 +1459,6 @@ void StorageWindowView::writeIntoWindowView( lateness_bound = t_max_fired_watermark; } - LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: lateness_bound {}, window_view.is_proctime {}", - lateness_bound, window_view.is_proctime); - if (lateness_bound > 0) /// Add filter, which leaves rows with timestamp >= lateness_bound { auto filter_function = makeASTFunction( @@ -1583,9 +1575,6 @@ void StorageWindowView::writeIntoWindowView( if (block_max_timestamp) window_view.updateMaxTimestamp(block_max_timestamp); - - LOG_TRACE(getLogger("StorageWindowView"), "writeIntoWindowView: block_max_timestamp {}", - block_max_timestamp); } UInt32 lateness_upper_bound = 0; From 4998c5888e6723f81627a14799ae0ade7676189b Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 11 Jun 2024 20:57:06 +0200 Subject: [PATCH 080/273] depricate update_insert_deduplication_token_in_dependent_materialized_views --- src/Core/Settings.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d6779a531ae..8ab66ba2a3e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -626,6 +626,7 @@ class IColumn; M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ + M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Depricated.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ From 3345f27b645838b058243a91bd69c8383f812324 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 11 Jun 2024 22:38:54 +0200 Subject: [PATCH 081/273] fix typo --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 8ab66ba2a3e..27201cc6cf0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -626,7 +626,7 @@ class IColumn; M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ - M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Depricated.", 0) \ + M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Deprecated.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ From 89234371438b94706fe903b9e55a30651bed0238 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 12 Jun 2024 19:36:23 +0200 Subject: [PATCH 082/273] add tests for cases from docs --- src/Core/Settings.h | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 5 +- src/Storages/MergeTree/MergeTreeSink.cpp | 7 +- ...08_deduplication_cases_from_docs.reference | 41 +++ .../03008_deduplication_cases_from_docs.sql | 331 ++++++++++++++++++ 5 files changed, 381 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference create mode 100644 tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 27201cc6cf0..1bfb5a1e18f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -624,7 +624,7 @@ class IColumn; M(Bool, optimize_time_filter_with_preimage, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')", 0) \ M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ - M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ + M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Deprecated.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index ed44a20e397..b35b6266735 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -441,9 +442,7 @@ Chain buildPushingToViewsChain( */ result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); - bool disable_deduplication_for_children = false; - if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) - disable_deduplication_for_children = !no_destination && storage->supportsDeduplication(); + bool disable_deduplication_for_children = !context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views; auto table_id = storage->getStorageID(); auto views = DatabaseCatalog::instance().getDependentViews(table_id); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 1fdcd4c5b74..4e20eade589 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,5 +1,8 @@ +#include +#include #include #include +#include #include #include #include @@ -185,6 +188,8 @@ void MergeTreeSink::finishDelayedChunk() if (!delayed_chunk) return; + const Settings & settings = context->getSettingsRef(); + for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -204,7 +209,7 @@ void MergeTreeSink::finishDelayedChunk() auto * deduplication_log = storage.getDeduplicationLog(); - if (deduplication_log) + if (settings.insert_deduplicate && deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference new file mode 100644 index 00000000000..4893274c1cd --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference @@ -0,0 +1,41 @@ +Different materialized view insert into one underlayed table equal data. +first attempt +from dst 1 A all_1_1_0 +from mv_dst 0 A all_1_1_0 +from mv_dst 0 A all_2_2_0 +second attempt +from dst 1 A all_1_1_0 +from mv_dst 0 A all_1_1_0 +from mv_dst 0 A all_2_2_0 +Different insert operations generate the same data after transformation in underlied table of materialized view. +first attempt +from dst 1 A all_1_1_0 +from mv_dst 0 A all_1_1_0 +second attempt +from dst 1 A all_1_1_0 +from dst 2 A all_2_2_0 +from mv_dst 0 A all_1_1_0 +from mv_dst 0 A all_2_2_0 +Indentical blocks in insertion with `insert_deduplication_token` +first attempt +from dst 0 A all_1_1_0 +from dst 0 A all_2_2_0 +second attempt +from dst 0 A all_1_1_0 +from dst 0 A all_2_2_0 +third attempt +from dst 0 A all_1_1_0 +from dst 0 A all_2_2_0 +Indentical blocks in insertion +from dst 0 A all_1_1_0 +Indentical blocks after materialised view`s transformation +first attempt +from dst 1 B all_1_1_0 +from dst 2 B all_2_2_0 +from mv_dst 0 B all_1_1_0 +from mv_dst 0 B all_2_2_0 +second attempt +from dst 1 B all_1_1_0 +from dst 2 B all_2_2_0 +from mv_dst 0 B all_1_1_0 +from mv_dst 0 B all_2_2_0 diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql new file mode 100644 index 00000000000..7927a6b1edf --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql @@ -0,0 +1,331 @@ +-- ######### +select 'Different materialized view insert into one underlayed table equal data.'; + +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS mv_dst; +DROP TABLE IF EXISTS mv_first; +DROP TABLE IF EXISTS mv_second; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE TABLE mv_dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE MATERIALIZED VIEW mv_first +TO mv_dst +AS SELECT + 0 AS key, + value AS value +FROM dst; + +CREATE MATERIALIZED VIEW mv_second +TO mv_dst +AS SELECT + 0 AS key, + value AS value +FROM dst; + +SET deduplicate_blocks_in_dependent_materialized_views=1; + +select 'first attempt'; + +INSERT INTO dst VALUES (1, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst VALUES (1, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +DROP TABLE mv_second; +DROP TABLE mv_first; +DROP TABLE mv_dst; +DROP TABLE dst; + + +-- ######### +select 'Different insert operations generate the same data after transformation in underlied table of materialized view.'; + +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS mv_dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE MATERIALIZED VIEW mv_dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000 +AS SELECT + 0 AS key, + value AS value +FROM dst; + +SET deduplicate_blocks_in_dependent_materialized_views=1; + +select 'first attempt'; + +INSERT INTO dst VALUES (1, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst VALUES (2, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +DROP TABLE mv_dst; +DROP TABLE dst; + + +-- ######### +select 'Indentical blocks in insertion with `insert_deduplication_token`'; + +DROP TABLE IF EXISTS dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + +select 'first attempt'; + +INSERT INTO dst SELECT + 0 AS key, + 'A' AS value +FROM numbers(2) +SETTINGS insert_deduplication_token='some_user_token'; + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst SELECT + 0 AS key, + 'A' AS value +FROM numbers(2) +SETTINGS insert_deduplication_token='some_user_token'; + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +select 'third attempt'; + +INSERT INTO dst SELECT + 1 AS key, + 'b' AS value +FROM numbers(2) +SETTINGS insert_deduplication_token='some_user_token'; + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +DROP TABLE dst; + + +-- ######### +select 'Indentical blocks in insertion'; + +DROP TABLE IF EXISTS dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + +INSERT INTO dst SELECT + 0 AS key, + 'A' AS value +FROM numbers(2); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +DROP TABLE dst; + + +-- ######### +select 'Indentical blocks after materialised view`s transformation'; + +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS mv_dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE MATERIALIZED VIEW mv_dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000 +AS SELECT + 0 AS key, + value AS value +FROM dst; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + +SET deduplicate_blocks_in_dependent_materialized_views=1; + +select 'first attempt'; + +INSERT INTO dst SELECT + number + 1 AS key, + IF(key = 0, 'A', 'B') AS value +FROM numbers(2); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst SELECT + number + 1 AS key, + IF(key = 0, 'A', 'B') AS value +FROM numbers(2); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +DROP TABLE mv_dst; +DROP TABLE dst; From 63852d9b0015b47ec93e2b7755c14bb7b002fcbd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 12 Jun 2024 20:45:29 +0200 Subject: [PATCH 083/273] fix fast test 00633_materialized_view_and_too_many_parts_zookeeper --- .../00633_materialized_view_and_too_many_parts_zookeeper.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh index 1fb219108da..8f7d19028b0 100755 --- a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh +++ b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh @@ -36,8 +36,8 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE c" echo ${CLICKHOUSE_CLIENT} --query "CREATE TABLE root (d UInt64) ENGINE = Null" ${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW d (d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/d', '1') ORDER BY d AS SELECT * FROM root" -${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; -${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; ${CLICKHOUSE_CLIENT} --query "SELECT * FROM d"; ${CLICKHOUSE_CLIENT} --query "DROP TABLE root" ${CLICKHOUSE_CLIENT} --query "DROP TABLE d" From dd28c052671651dd0891ad4c8a0a43f9842a6ce3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 12 Jun 2024 20:20:39 +0000 Subject: [PATCH 084/273] fix some cases --- .../Passes/FunctionToSubcolumnsPass.cpp | 17 +++++ .../array/FunctionsMapMiscellaneous.cpp | 6 +- ...functions_to_subcolumns_analyzer.reference | 72 +++++++++---------- ...71_function_to_subcolumns_fuzzer.reference | 3 + .../03171_function_to_subcolumns_fuzzer.sql | 39 ++++++++++ 5 files changed, 97 insertions(+), 40 deletions(-) create mode 100644 tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference create mode 100644 tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 030feac65dc..9cfd22cbef5 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -19,6 +19,7 @@ #include #include #include +#include namespace DB { @@ -269,10 +270,24 @@ public: enterImpl(*function_node, *first_argument_node, *table_node); return; } + + if (const auto * join_node = node->as()) + { + has_join_use_nulls |= getContext()->getSettingsRef().join_use_nulls; + return; + } } std::unordered_set getIdentifiersToOptimize() const { + if (has_join_use_nulls) + { + /// Do not optimize if we have JOIN with setting join_use_null. + /// It may change the behaviour if subcolumn can be coverted + /// to nullable while the original column cannot. + return {}; + } + /// Do not optimize if full column is requested in other context. /// It doesn't make sense because it doesn't reduce amount of read data /// and optimized functions are not computation heavy. But introducing @@ -306,7 +321,9 @@ private: std::unordered_set all_key_columns; std::unordered_map identifiers_count; std::unordered_map optimized_identifiers_count; + NameSet processed_tables; + bool has_join_use_nulls = false; void enterImpl(const TableNode & table_node) { diff --git a/src/Functions/array/FunctionsMapMiscellaneous.cpp b/src/Functions/array/FunctionsMapMiscellaneous.cpp index 76c1ec18171..c3586a57161 100644 --- a/src/Functions/array/FunctionsMapMiscellaneous.cpp +++ b/src/Functions/array/FunctionsMapMiscellaneous.cpp @@ -51,6 +51,8 @@ public: bool isVariadic() const override { return impl.isVariadic(); } size_t getNumberOfArguments() const override { return impl.getNumberOfArguments(); } + bool useDefaultImplementationForNulls() const override { return impl.useDefaultImplementationForNulls(); } + bool useDefaultImplementationForLowCardinalityColumns() const override { return impl.useDefaultImplementationForLowCardinalityColumns(); } bool useDefaultImplementationForConstants() const override { return impl.useDefaultImplementationForConstants(); } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return false; } @@ -184,7 +186,7 @@ struct MapToNestedAdapter : public MapAdapterBase struct MapToSubcolumnAdapter { - static_assert(position <= 1); + static_assert(position <= 1, "position of Map subcolumn must be 0 or 1"); static void extractNestedTypes(DataTypes & types) { @@ -357,7 +359,7 @@ struct NameMapValues { static constexpr auto name = "mapValues"; }; using FunctionMapValues = FunctionMapToArrayAdapter, NameMapValues>; struct NameMapContains { static constexpr auto name = "mapContains"; }; -using FunctionMapContains = FunctionMapToArrayAdapter, MapToSubcolumnAdapter, NameMapContains>; +using FunctionMapContains = FunctionMapToArrayAdapter, MapToSubcolumnAdapter, NameMapContains>; struct NameMapFilter { static constexpr auto name = "mapFilter"; }; using FunctionMapFilter = FunctionMapToArrayAdapter, NameMapFilter>; diff --git a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference index e409e9ad89f..32bacfba5ea 100644 --- a/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference +++ b/tests/queries/0_stateless/01872_functions_to_subcolumns_analyzer.reference @@ -7,20 +7,22 @@ QUERY id: 0 isNotNull(n) UInt8 PROJECTION LIST id: 1, nodes: 3 - FUNCTION id: 2, function_name: isNull, function_type: ordinary, result_type: UInt8 + CONSTANT id: 2, constant_value: UInt64_0, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 3, function_name: isNull, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 4, nodes: 1 + COLUMN id: 5, column_name: id, result_type: UInt64, source_id: 6 + COLUMN id: 7, column_name: n.null, result_type: UInt8, source_id: 6 + FUNCTION id: 8, function_name: not, function_type: ordinary, result_type: UInt8 ARGUMENTS - LIST id: 3, nodes: 1 - COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5 - COLUMN id: 6, column_name: n.null, result_type: UInt8, source_id: 5 - FUNCTION id: 7, function_name: not, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 8, nodes: 1 - COLUMN id: 9, column_name: n.null, result_type: UInt8, source_id: 5 + LIST id: 9, nodes: 1 + COLUMN id: 10, column_name: n.null, result_type: UInt8, source_id: 6 JOIN TREE - TABLE id: 5, alias: __table1, table_name: default.t_func_to_subcolumns + TABLE id: 6, alias: __table1, table_name: default.t_func_to_subcolumns SELECT - __table1.id IS NULL AS `isNull(id)`, + _CAST(0, \'UInt8\') AS `isNull(id)`, __table1.`n.null` AS `isNull(n)`, NOT __table1.`n.null` AS `isNotNull(n)` FROM default.t_func_to_subcolumns AS __table1 @@ -120,64 +122,58 @@ QUERY id: 0 LIST id: 1, nodes: 3 COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 COLUMN id: 4, column_name: n.null, result_type: UInt8, source_id: 3 - FUNCTION id: 5, function_name: isNull, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 6, nodes: 1 - COLUMN id: 7, column_name: n, result_type: String, source_id: 8 + CONSTANT id: 5, constant_value: UInt64_0, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 6, function_name: isNull, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: n, result_type: String, source_id: 9 JOIN TREE - JOIN id: 9, strictness: ALL, kind: FULL + JOIN id: 10, strictness: ALL, kind: FULL LEFT TABLE EXPRESSION TABLE id: 3, alias: __table1, table_name: default.t_func_to_subcolumns RIGHT TABLE EXPRESSION - UNION id: 8, alias: __table2, is_subquery: 1, union_mode: UNION_ALL + UNION id: 9, alias: __table2, is_subquery: 1, union_mode: UNION_ALL QUERIES - LIST id: 10, nodes: 2 - QUERY id: 11, alias: __table3 + LIST id: 11, nodes: 2 + QUERY id: 12, alias: __table3 PROJECTION COLUMNS id UInt8 - n String PROJECTION - LIST id: 12, nodes: 2 - CONSTANT id: 13, constant_value: UInt64_1, constant_value_type: UInt8 - CONSTANT id: 14, constant_value: \'qqq\', constant_value_type: String + LIST id: 13, nodes: 1 + CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8 JOIN TREE TABLE id: 15, alias: __table4, table_name: system.one QUERY id: 16, alias: __table5 PROJECTION COLUMNS id UInt8 - \'www\' String PROJECTION - LIST id: 17, nodes: 2 + LIST id: 17, nodes: 1 CONSTANT id: 18, constant_value: UInt64_3, constant_value_type: UInt8 - CONSTANT id: 19, constant_value: \'www\', constant_value_type: String JOIN TREE - TABLE id: 20, alias: __table6, table_name: system.one + TABLE id: 19, alias: __table6, table_name: system.one JOIN EXPRESSION - LIST id: 21, nodes: 1 - COLUMN id: 22, column_name: id, result_type: UInt64, source_id: 9 + LIST id: 20, nodes: 1 + COLUMN id: 21, column_name: id, result_type: UInt64, source_id: 10 EXPRESSION - LIST id: 23, nodes: 2 - COLUMN id: 24, column_name: id, result_type: UInt64, source_id: 3 - COLUMN id: 25, column_name: id, result_type: UInt8, source_id: 8 + LIST id: 22, nodes: 2 + COLUMN id: 23, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 24, column_name: id, result_type: UInt8, source_id: 9 SELECT __table1.id AS id, __table1.`n.null` AS `isNull(n)`, - __table2.n IS NULL AS `isNull(right.n)` + _CAST(0, \'UInt8\') AS `isNull(right.n)` FROM default.t_func_to_subcolumns AS __table1 ALL FULL OUTER JOIN ( ( - SELECT - 1 AS id, - \'qqq\' AS n + SELECT 1 AS id FROM system.one AS __table4 ) UNION ALL ( - SELECT - 3 AS id, - \'www\' AS `\'www\'` + SELECT 3 AS id FROM system.one AS __table6 ) ) AS __table2 USING (id) diff --git a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference new file mode 100644 index 00000000000..be47c4ab571 --- /dev/null +++ b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference @@ -0,0 +1,3 @@ +1 +2 1 +3 0 diff --git a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql new file mode 100644 index 00000000000..587288bbfdf --- /dev/null +++ b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql @@ -0,0 +1,39 @@ +SET optimize_functions_to_subcolumns = 1; +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS t_func_to_subcolumns_map_2; + +CREATE TABLE t_func_to_subcolumns_map_2 (id UInt64, m Map(String, UInt64)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_func_to_subcolumns_map_2 VALUES (1, map('aaa', 1, 'bbb', 2)) (2, map('ccc', 3)); + +SELECT sum(mapContains(m, toNullable('aaa'))) FROM t_func_to_subcolumns_map_2; + +DROP TABLE t_func_to_subcolumns_map_2; + +DROP TABLE IF EXISTS t_func_to_subcolumns_join; + +CREATE TABLE t_func_to_subcolumns_join (id UInt64, arr Array(UInt64), n Nullable(String), m Map(String, UInt64)) +ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_func_to_subcolumns_join VALUES (1, [1, 2, 3], 'abc', map('foo', 1, 'bar', 2)) (2, [], NULL, map()); + +SET join_use_nulls = 1; + +SELECT + id, + right.n IS NULL +FROM t_func_to_subcolumns_join AS left +FULL OUTER JOIN +( + SELECT + 1 AS id, + 'qqq' AS n + UNION ALL + SELECT + 3 AS id, + 'www' +) AS right USING (id) +WHERE empty(arr); + +DROP TABLE t_func_to_subcolumns_join; From 0da1bb3f049f6bcc76dee0821c5dcc74f2dd55b2 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 12 Jun 2024 22:40:54 +0000 Subject: [PATCH 085/273] fix typo --- src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 9cfd22cbef5..bc2028e1b43 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -283,7 +283,7 @@ public: if (has_join_use_nulls) { /// Do not optimize if we have JOIN with setting join_use_null. - /// It may change the behaviour if subcolumn can be coverted + /// It may change the behaviour if subcolumn can be converted /// to nullable while the original column cannot. return {}; } From a0a7f176126a8f9dc65d7ea8cb488e13feda6ccb Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 17 Jun 2024 16:32:16 +0200 Subject: [PATCH 086/273] add tags to the new tests --- .../03008_deduplication_insert_several_blocks_nonreplicated.sh | 1 + .../03008_deduplication_insert_several_blocks_replicated.sh | 1 + ...08_deduplication_mv_generates_several_blocks_nonreplicated.sh | 1 + ...03008_deduplication_mv_generates_several_blocks_replicated.sh | 1 + ...3008_deduplication_several_mv_into_one_table_nonreplicated.sh | 1 + .../03008_deduplication_several_mv_into_one_table_replicated.sh | 1 + 6 files changed, 6 insertions(+) diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh index c758e2fb3de..49eb52b47fd 100755 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh index 45b222b1fc4..53af06d4a6f 100755 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh index 50cf2a3bb75..7d4f5240cd1 100755 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh index 2b094e0309e..109d1674f3a 100755 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh index 33da54b90f1..fe3d610a758 100755 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh index 290d1f794b2..9adee6d53d4 100755 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 90faa7b1eced87e0f2979e792d6569d1e2e005e8 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 19 Jun 2024 21:44:37 +0100 Subject: [PATCH 087/273] impl --- src/Common/CgroupsMemoryUsageObserver.cpp | 178 +++++++++++----------- src/Common/CgroupsMemoryUsageObserver.h | 14 +- 2 files changed, 102 insertions(+), 90 deletions(-) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 8a4792f0a5a..20db6a64a31 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -1,3 +1,5 @@ +#include +#include #include #if defined(OS_LINUX) @@ -28,8 +30,6 @@ namespace DB namespace ErrorCodes { - extern const int CANNOT_CLOSE_FILE; - extern const int CANNOT_OPEN_FILE; extern const int FILE_DOESNT_EXIST; extern const int INCORRECT_DATA; } @@ -107,6 +107,75 @@ void CgroupsMemoryUsageObserver::setOnMemoryAmountAvailableChangedFn(OnMemoryAmo namespace { +/// Format is +/// kernel 5 +/// rss 15 +/// [...] +uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key) +{ + while (!buf.eof()) + { + std::string current_key; + readStringUntilWhitespace(current_key, buf); + if (current_key != key) + { + std::string dummy; + readStringUntilNewlineInto(dummy, buf); + buf.ignore(); + continue; + } + + assertChar(' ', buf); + uint64_t mem_usage = 0; + readIntText(mem_usage, buf); + return mem_usage; + } + + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find '{}' in '{}'", key, buf.getFileName()); +} + +struct CgroupsV1Reader : ICgroupsReader +{ + CgroupsV1Reader(const std::filesystem::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } + + uint64_t readMemoryUsage() override + { + std::lock_guard lock(mutex); + buf.rewind(); + return readMetricFromStatFile(buf, "rss"); + } + +private: + std::mutex mutex; + ReadBufferFromFile buf TSA_GUARDED_BY(mutex); +}; + +struct CgroupsV2Reader : ICgroupsReader +{ + CgroupsV2Reader(const std::filesystem::path & stat_file_dir) + : current_buf(stat_file_dir / "memory.current"), stat_buf(stat_file_dir / "memory.stat") + { + } + + uint64_t readMemoryUsage() override + { + std::lock_guard lock(mutex); + current_buf.rewind(); + stat_buf.rewind(); + + uint64_t mem_usage = 0; + /// memory.current contains a single number + readIntText(mem_usage, current_buf); + mem_usage -= readMetricFromStatFile(stat_buf, "inactive_file"); + return mem_usage; + } + +private: + std::mutex mutex; + ReadBufferFromFile current_buf TSA_GUARDED_BY(mutex); + ReadBufferFromFile stat_buf TSA_GUARDED_BY(mutex); +}; + /// Caveats: /// - All of the logic in this file assumes that the current process is the only process in the /// containing cgroup (or more precisely: the only process with significant memory consumption). @@ -117,7 +186,7 @@ namespace /// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such /// systems existed only for a short transition period. -std::optional getCgroupsV2FileName() +std::optional getCgroupsV2Path() { if (!cgroupsV2Enabled()) return {}; @@ -132,29 +201,30 @@ std::optional getCgroupsV2FileName() /// level, try again at the parent level as memory settings are inherited. while (current_cgroup != default_cgroups_mount.parent_path()) { - auto path = current_cgroup / "memory.current"; - if (std::filesystem::exists(path)) - return {path}; + const auto current_path = current_cgroup / "memory.current"; + const auto stat_path = current_cgroup / "memory.stat"; + if (std::filesystem::exists(current_path) && std::filesystem::exists(stat_path)) + return {current_cgroup}; current_cgroup = current_cgroup.parent_path(); } return {}; } -std::optional getCgroupsV1FileName() +std::optional getCgroupsV1Path() { auto path = default_cgroups_mount / "memory/memory.stat"; if (!std::filesystem::exists(path)) return {}; - return {path}; + return {default_cgroups_mount / "memory"}; } -std::pair getCgroupsFileName() +std::pair getCgroupsPath() { - auto v2_file_name = getCgroupsV2FileName(); + auto v2_file_name = getCgroupsV2Path(); if (v2_file_name.has_value()) return {*v2_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; - auto v1_file_name = getCgroupsV1FileName(); + auto v1_file_name = getCgroupsV1Path(); if (v1_file_name.has_value()) return {*v1_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V1}; @@ -166,87 +236,25 @@ std::pair getCgroupsFil CgroupsMemoryUsageObserver::MemoryUsageFile::MemoryUsageFile(LoggerPtr log_) : log(log_) { - std::tie(file_name, version) = getCgroupsFileName(); + const auto [cgroup_path, version] = getCgroupsPath(); - LOG_INFO(log, "Will read the current memory usage from '{}' (cgroups version: {})", file_name, (version == CgroupsVersion::V1) ? "v1" : "v2"); + if (version == CgroupsVersion::V2) + cgroup_reader = std::make_unique(cgroup_path); + else + cgroup_reader = std::make_unique(cgroup_path); - fd = ::open(file_name.data(), O_RDONLY); - if (fd == -1) - ErrnoException::throwFromPath( - (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE, - file_name, "Cannot open file '{}'", file_name); -} - -CgroupsMemoryUsageObserver::MemoryUsageFile::~MemoryUsageFile() -{ - assert(fd != -1); - if (::close(fd) != 0) - { - try - { - ErrnoException::throwFromPath( - ErrorCodes::CANNOT_CLOSE_FILE, - file_name, "Cannot close file '{}'", file_name); - } - catch (const ErrnoException &) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); - } - } + LOG_INFO( + log, + "Will read the current memory usage from '{}' (cgroups version: {})", + cgroup_path, + (version == CgroupsVersion::V1) ? "v1" : "v2"); } uint64_t CgroupsMemoryUsageObserver::MemoryUsageFile::readMemoryUsage() const { - /// File read is probably not read is thread-safe, just to be sure - std::lock_guard lock(mutex); - - ReadBufferFromFileDescriptor buf(fd); - buf.rewind(); - - uint64_t mem_usage = 0; - - switch (version) - { - case CgroupsVersion::V1: - { - /// Format is - /// kernel 5 - /// rss 15 - /// [...] - std::string key; - bool found_rss = false; - - while (!buf.eof()) - { - readStringUntilWhitespace(key, buf); - if (key != "rss") - { - std::string dummy; - readStringUntilNewlineInto(dummy, buf); - buf.ignore(); - continue; - } - - assertChar(' ', buf); - readIntText(mem_usage, buf); - found_rss = true; - break; - } - - if (!found_rss) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find 'rss' in '{}'", file_name); - - break; - } - case CgroupsVersion::V2: - { - readIntText(mem_usage, buf); - break; - } - } - + chassert(cgroup_reader); + const auto mem_usage = cgroup_reader->readMemoryUsage(); LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(mem_usage)); - return mem_usage; } diff --git a/src/Common/CgroupsMemoryUsageObserver.h b/src/Common/CgroupsMemoryUsageObserver.h index edc1cee750a..62bbabb9e86 100644 --- a/src/Common/CgroupsMemoryUsageObserver.h +++ b/src/Common/CgroupsMemoryUsageObserver.h @@ -3,11 +3,19 @@ #include #include +#include #include namespace DB { +struct ICgroupsReader +{ + virtual ~ICgroupsReader() = default; + + virtual uint64_t readMemoryUsage() = 0; +}; + /// Does two things: /// 1. Periodically reads the memory usage of the process from Linux cgroups. /// You can specify soft or hard memory limits: @@ -66,14 +74,10 @@ private: { public: explicit MemoryUsageFile(LoggerPtr log_); - ~MemoryUsageFile(); uint64_t readMemoryUsage() const; private: LoggerPtr log; - mutable std::mutex mutex; - int fd TSA_GUARDED_BY(mutex) = -1; - CgroupsVersion version; - std::string file_name; + std::unique_ptr cgroup_reader; }; MemoryUsageFile memory_usage_file; From 1fa5212836219f89fe2ea8877d882daf0a928bce Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 20 Jun 2024 17:49:51 +0100 Subject: [PATCH 088/273] remove MemoryUsageFile --- src/Common/CgroupsMemoryUsageObserver.cpp | 153 +++++++++++----------- src/Common/CgroupsMemoryUsageObserver.h | 15 +-- 2 files changed, 75 insertions(+), 93 deletions(-) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 20db6a64a31..23bfec4322b 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -24,84 +24,17 @@ #define STRINGIFY(x) STRINGIFY_HELPER(x) #endif +using namespace DB; namespace DB { namespace ErrorCodes { - extern const int FILE_DOESNT_EXIST; - extern const int INCORRECT_DATA; +extern const int FILE_DOESNT_EXIST; +extern const int INCORRECT_DATA; } -CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_) - : log(getLogger("CgroupsMemoryUsageObserver")) - , wait_time(wait_time_) - , memory_usage_file(log) -{ - LOG_INFO(log, "Initialized cgroups memory limit observer, wait time is {} sec", wait_time.count()); -} - -CgroupsMemoryUsageObserver::~CgroupsMemoryUsageObserver() -{ - stopThread(); -} - -void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint64_t soft_limit_) -{ - std::lock_guard limit_lock(limit_mutex); - - if (hard_limit_ == hard_limit && soft_limit_ == soft_limit) - return; - - hard_limit = hard_limit_; - soft_limit = soft_limit_; - - on_hard_limit = [this, hard_limit_](bool up) - { - if (up) - { - LOG_WARNING(log, "Exceeded hard memory limit ({})", ReadableSize(hard_limit_)); - - /// Update current usage in memory tracker. Also reset free_memory_in_allocator_arenas to zero though we don't know if they are - /// really zero. Trying to avoid OOM ... - MemoryTracker::setRSS(hard_limit_, 0); - } - else - { - LOG_INFO(log, "Dropped below hard memory limit ({})", ReadableSize(hard_limit_)); - } - }; - - on_soft_limit = [this, soft_limit_](bool up) - { - if (up) - { - LOG_WARNING(log, "Exceeded soft memory limit ({})", ReadableSize(soft_limit_)); - -#if USE_JEMALLOC - LOG_INFO(log, "Purging jemalloc arenas"); - mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); -#endif - /// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them. - uint64_t memory_usage = memory_usage_file.readMemoryUsage(); - MemoryTracker::setRSS(memory_usage, 0); - - LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage)); - } - else - { - LOG_INFO(log, "Dropped below soft memory limit ({})", ReadableSize(soft_limit_)); - } - }; - - LOG_INFO(log, "Set new limits, soft limit: {}, hard limit: {}", ReadableSize(soft_limit_), ReadableSize(hard_limit_)); -} - -void CgroupsMemoryUsageObserver::setOnMemoryAmountAvailableChangedFn(OnMemoryAmountAvailableChangedFn on_memory_amount_available_changed_) -{ - std::lock_guard memory_amount_available_changed_lock(memory_amount_available_changed_mutex); - on_memory_amount_available_changed = on_memory_amount_available_changed_; } namespace @@ -233,8 +166,11 @@ std::pair getCgroupsPat } -CgroupsMemoryUsageObserver::MemoryUsageFile::MemoryUsageFile(LoggerPtr log_) - : log(log_) +namespace DB +{ + +CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_) + : log(getLogger("CgroupsMemoryUsageObserver")), wait_time(wait_time_) { const auto [cgroup_path, version] = getCgroupsPath(); @@ -245,17 +181,73 @@ CgroupsMemoryUsageObserver::MemoryUsageFile::MemoryUsageFile(LoggerPtr log_) LOG_INFO( log, - "Will read the current memory usage from '{}' (cgroups version: {})", + "Will read the current memory usage from '{}' (cgroups version: {}), wait time is {} sec", cgroup_path, - (version == CgroupsVersion::V1) ? "v1" : "v2"); + (version == CgroupsVersion::V1) ? "v1" : "v2", + wait_time.count()); } -uint64_t CgroupsMemoryUsageObserver::MemoryUsageFile::readMemoryUsage() const +CgroupsMemoryUsageObserver::~CgroupsMemoryUsageObserver() { - chassert(cgroup_reader); - const auto mem_usage = cgroup_reader->readMemoryUsage(); - LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(mem_usage)); - return mem_usage; + stopThread(); +} + +void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint64_t soft_limit_) +{ + std::lock_guard limit_lock(limit_mutex); + + if (hard_limit_ == hard_limit && soft_limit_ == soft_limit) + return; + + hard_limit = hard_limit_; + soft_limit = soft_limit_; + + on_hard_limit = [this, hard_limit_](bool up) + { + if (up) + { + LOG_WARNING(log, "Exceeded hard memory limit ({})", ReadableSize(hard_limit_)); + + /// Update current usage in memory tracker. Also reset free_memory_in_allocator_arenas to zero though we don't know if they are + /// really zero. Trying to avoid OOM ... + MemoryTracker::setRSS(hard_limit_, 0); + } + else + { + LOG_INFO(log, "Dropped below hard memory limit ({})", ReadableSize(hard_limit_)); + } + }; + + on_soft_limit = [this, soft_limit_](bool up) + { + if (up) + { + LOG_WARNING(log, "Exceeded soft memory limit ({})", ReadableSize(soft_limit_)); + +# if USE_JEMALLOC + LOG_INFO(log, "Purging jemalloc arenas"); + mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); +# endif + /// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them. + uint64_t memory_usage = cgroup_reader->readMemoryUsage(); + LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(memory_usage)); + MemoryTracker::setRSS(memory_usage, 0); + + LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage)); + } + else + { + LOG_INFO(log, "Dropped below soft memory limit ({})", ReadableSize(soft_limit_)); + } + }; + + LOG_INFO(log, "Set new limits, soft limit: {}, hard limit: {}", ReadableSize(soft_limit_), ReadableSize(hard_limit_)); +} + +void CgroupsMemoryUsageObserver::setOnMemoryAmountAvailableChangedFn(OnMemoryAmountAvailableChangedFn on_memory_amount_available_changed_) +{ + std::lock_guard memory_amount_available_changed_lock(memory_amount_available_changed_mutex); + on_memory_amount_available_changed = on_memory_amount_available_changed_; } void CgroupsMemoryUsageObserver::startThread() @@ -309,7 +301,8 @@ void CgroupsMemoryUsageObserver::runThread() std::lock_guard limit_lock(limit_mutex); if (soft_limit > 0 && hard_limit > 0) { - uint64_t memory_usage = memory_usage_file.readMemoryUsage(); + uint64_t memory_usage = cgroup_reader->readMemoryUsage(); + LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(memory_usage)); if (memory_usage > hard_limit) { if (last_memory_usage <= hard_limit) diff --git a/src/Common/CgroupsMemoryUsageObserver.h b/src/Common/CgroupsMemoryUsageObserver.h index 62bbabb9e86..b848a2bff3c 100644 --- a/src/Common/CgroupsMemoryUsageObserver.h +++ b/src/Common/CgroupsMemoryUsageObserver.h @@ -69,23 +69,12 @@ private: uint64_t last_memory_usage = 0; /// how much memory does the process use uint64_t last_available_memory_amount; /// how much memory can the process use - /// Represents the cgroup virtual file that shows the memory consumption of the process's cgroup. - struct MemoryUsageFile - { - public: - explicit MemoryUsageFile(LoggerPtr log_); - uint64_t readMemoryUsage() const; - private: - LoggerPtr log; - std::unique_ptr cgroup_reader; - }; - - MemoryUsageFile memory_usage_file; - void stopThread(); void runThread(); + std::unique_ptr cgroup_reader; + std::mutex thread_mutex; std::condition_variable cond; ThreadFromGlobalPool thread; From 0f0e1cee63a8e2047f2092d7db4e81c5a3b53572 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 20 Jun 2024 20:13:59 +0100 Subject: [PATCH 089/273] fix tidy --- src/Common/CgroupsMemoryUsageObserver.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 23bfec4322b..c37e3c74db9 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -1,5 +1,5 @@ +#include #include -#include #include #if defined(OS_LINUX) @@ -69,7 +69,7 @@ uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & ke struct CgroupsV1Reader : ICgroupsReader { - CgroupsV1Reader(const std::filesystem::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } + explicit CgroupsV1Reader(const std::filesystem::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } uint64_t readMemoryUsage() override { @@ -85,7 +85,7 @@ private: struct CgroupsV2Reader : ICgroupsReader { - CgroupsV2Reader(const std::filesystem::path & stat_file_dir) + explicit CgroupsV2Reader(const std::filesystem::path & stat_file_dir) : current_buf(stat_file_dir / "memory.current"), stat_buf(stat_file_dir / "memory.stat") { } From df8341c447ecf1775e27dcdd6ea09829d9e35880 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 21 Jun 2024 18:30:45 +0200 Subject: [PATCH 090/273] Try to improve low number itoa --- base/base/itoa.cpp | 75 ++++++++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 32 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index e7250764704..0997daebbf6 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -3,6 +3,34 @@ #include #include +namespace +{ +ALWAYS_INLINE inline char * outOneDigit(char * p, uint8_t value) +{ + *p = '0' + value; + return p + 1; +} + +// Using a lookup table to convert binary numbers from 0 to 99 +// into ascii characters as described by Andrei Alexandrescu in +// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ +const char digits[201] = "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; +ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) +{ + memcpy(p, &digits[value * 2], 2); + p += 2; + return p; +} + namespace jeaiii { /* @@ -84,43 +112,48 @@ template inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) { constexpr auto q = sizeof(T); - using U = cond>>; + using U = cond>>; // convert bool to int before test with unary + to silence warning if T happens to be bool U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i); - if (n < UInt32(1e2)) + if (n < U(1e2)) { - *reinterpret_cast(b) = digits.fd[n]; - return n < 10 ? b + 1 : b + 2; + return n < 10 ? outOneDigit(b, n) : outTwoDigits(b, n); } if (n < UInt32(1e6)) { - if (n < UInt32(1e4)) + if (sizeof(U) == 1 || n < U(1e4)) { auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * n; *reinterpret_cast(b) = digits.fd[f0 >> 24]; - b -= n < UInt32(1e3); + if constexpr (sizeof(U) == 1) + b -= 1; + else + b -= n < U(1e3); auto f2 = (f0 & mask24) * 100; *reinterpret_cast(b + 2) = digits.dd[f2 >> 24]; return b + 4; } auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * n; *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= n < UInt32(1e5); + if constexpr (sizeof(U) == 2) + b -= 1; + else + b -= n < U(1e5); auto f2 = (f0 & mask32) * 100; *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; auto f4 = (f2 & mask32) * 100; *reinterpret_cast(b + 4) = digits.dd[f4 >> 32]; return b + 6; } - if (n < UInt64(1ull << 32ull)) + if (sizeof(U) == 4 || n < UInt64(1ull << 32ull)) { - if (n < UInt32(1e8)) + if (n < U(1e8)) { auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; *reinterpret_cast(b) = digits.fd[f0 >> 32]; - b -= n < UInt32(1e7); + b -= n < U(1e7); auto f2 = (f0 & mask32) * 100; *reinterpret_cast(b + 2) = digits.dd[f2 >> 32]; auto f4 = (f2 & mask32) * 100; @@ -248,28 +281,6 @@ inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) } } -namespace -{ -// Using a lookup table to convert binary numbers from 0 to 99 -// into ascii characters as described by Andrei Alexandrescu in -// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ -const char digits[201] = "00010203040506070809" - "10111213141516171819" - "20212223242526272829" - "30313233343536373839" - "40414243444546474849" - "50515253545556575859" - "60616263646566676869" - "70717273747576777879" - "80818283848586878889" - "90919293949596979899"; -ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) -{ - memcpy(p, &digits[value * 2], 2); - p += 2; - return p; -} - const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull; const int max_multiple_of_hundred_blocks = 9; static_assert(max_multiple_of_hundred_that_fits_in_64_bits % 100 == 0); From 7e0ed1b02cb55b0ce5788c9f1cf7c69c163ad14b Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 21 Jun 2024 21:29:46 +0100 Subject: [PATCH 091/273] add test --- src/Common/CgroupsMemoryUsageObserver.cpp | 4 +-- .../test_memory_limit_observer/test.py | 25 ++++++++++++++++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index c37e3c74db9..33393a8b9c6 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -230,7 +230,7 @@ void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint # endif /// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them. uint64_t memory_usage = cgroup_reader->readMemoryUsage(); - LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(memory_usage)); + LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage)); MemoryTracker::setRSS(memory_usage, 0); LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage)); @@ -302,7 +302,7 @@ void CgroupsMemoryUsageObserver::runThread() if (soft_limit > 0 && hard_limit > 0) { uint64_t memory_usage = cgroup_reader->readMemoryUsage(); - LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(memory_usage)); + LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage)); if (memory_usage > hard_limit) { if (last_memory_usage <= hard_limit) diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index fe3acd9a0cf..369b9241f07 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -35,7 +35,7 @@ def get_latest_mem_limit(): ).strip() ) return mem_limit - except Exception as e: + except Exception: time.sleep(1) raise Exception("Cannot get memory limit") @@ -51,3 +51,26 @@ def test_observe_memory_limit(started_cluster): if new_max_mem > original_max_mem: return raise Exception("the memory limit does not increase as expected") + + +def test_memory_usage_doesnt_include_page_cache_size(started_cluster): + # populate page cache with 10GB of data + node1.exec_in_container( + ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=10K"] + ) + + observer_refresh_period = int( + node1.query( + "select value from system.server_settings where name = 'cgroups_memory_usage_observer_wait_time'" + ).strip() + ) + time.sleep(observer_refresh_period + 1) + + max_mem_usage_from_cgroup = node1.query( + """ + SELECT max(toUInt64(replaceRegexpAll(message, 'Read current memory usage (\\d+) bytes.*', '\\1'))) AS max_mem + FROM system.text_log + WHERE logger_name = 'CgroupsMemoryUsageObserver' AND message LIKE 'Read current memory usage%bytes%' + """ + ).strip() + assert int(max_mem_usage_from_cgroup) < 2 * 2 ** 30 From 750c902671bb64f02c7bf6918779561a06711de6 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 21 Jun 2024 20:44:53 +0000 Subject: [PATCH 092/273] Automatic style fix --- tests/integration/test_memory_limit_observer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index 369b9241f07..d8c2a0e8ad7 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -73,4 +73,4 @@ def test_memory_usage_doesnt_include_page_cache_size(started_cluster): WHERE logger_name = 'CgroupsMemoryUsageObserver' AND message LIKE 'Read current memory usage%bytes%' """ ).strip() - assert int(max_mem_usage_from_cgroup) < 2 * 2 ** 30 + assert int(max_mem_usage_from_cgroup) < 2 * 2**30 From 556c7deeff11e404630948adf3bbd171170dd3eb Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Mon, 24 Jun 2024 01:19:50 +0000 Subject: [PATCH 093/273] add drop option in lightweight delete on table with projections --- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 1 + src/Interpreters/InterpreterDeleteQuery.cpp | 91 ++++++++++++++----- ...61_lightweight_delete_projection.reference | 2 + .../03161_lightweight_delete_projection.sql | 11 ++- 5 files changed, 81 insertions(+), 25 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b3e83092a77..d85edcdae1f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -608,6 +608,7 @@ class IColumn; M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \ M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \ + M(String, lightweight_mutation_projection_mode, "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete.", 0) \ M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 69bc8c5d207..abad02f67c3 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -105,6 +105,7 @@ static const std::maplockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); - if (table->supportsDelete()) - { - /// Convert to MutationCommand - MutationCommands mutation_commands; - MutationCommand mut_command; - - mut_command.type = MutationCommand::Type::DELETE; - mut_command.predicate = delete_query.predicate; - - mutation_commands.emplace_back(mut_command); - - table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); - MutationsInterpreter::Settings settings(false); - MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate(); - table->mutate(mutation_commands, getContext()); - return {}; - } - else if (table->supportsLightweightDelete()) + auto lightweightDelete = [&]() { if (!getContext()->getSettingsRef().enable_lightweight_delete) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, @@ -105,17 +88,77 @@ BlockIO InterpreterDeleteQuery::execute() context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync)); InterpreterAlterQuery alter_interpreter(alter_ast, context); return alter_interpreter.execute(); + }; + + if (table->supportsDelete()) + { + /// Convert to MutationCommand + MutationCommands mutation_commands; + MutationCommand mut_command; + + mut_command.type = MutationCommand::Type::DELETE; + mut_command.predicate = delete_query.predicate; + + mutation_commands.emplace_back(mut_command); + + table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); + MutationsInterpreter::Settings settings(false); + MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate(); + table->mutate(mutation_commands, getContext()); + return {}; + } + else if (table->supportsLightweightDelete()) + { + return lightweightDelete(); } else { - /// Currently just better exception for the case of a table with projection, - /// can act differently according to the setting. if (table->hasProjection()) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "DELETE query is not supported for table {} as it has projections. " - "User should drop all the projections manually before running the query", - table->getStorageID().getFullTableName()); + auto context = Context::createCopy(getContext()); + auto mode = Field(context->getSettingsRef().lightweight_mutation_projection_mode); + if (mode == "throw") + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "DELETE query is not supported for table {} as it has projections. " + "User should drop all the projections manually before running the query", + table->getStorageID().getFullTableName()); + } + else if (mode == "drop") + { + std::vector all_projections = metadata_snapshot->projections.getAllRegisteredNames(); + + context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync)); + + /// Drop projections first so that lightweight delete can be performed. + for (const auto & projection : all_projections) + { + String alter_query = + "ALTER TABLE " + table->getStorageID().getFullTableName() + + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster)) + + " DROP PROJECTION IF EXISTS " + projection; + + ParserAlterQuery parser; + ASTPtr alter_ast = parseQuery( + parser, + alter_query.data(), + alter_query.data() + alter_query.size(), + "ALTER query", + 0, + DBMS_DEFAULT_MAX_PARSER_DEPTH, + DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); + + InterpreterAlterQuery alter_interpreter(alter_ast, context); + alter_interpreter.execute(); + } + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Unrecognized lightweight_mutation_projection_mode, only throw and drop are allowed."); + } + + return lightweightDelete(); } throw Exception(ErrorCodes::BAD_ARGUMENTS, diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index e69de29bb2d..15832d4cdfa 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -0,0 +1,2 @@ +8888 Alice 50 +1231 John 33 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index cd29fae8fd7..786f6a3cc34 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -5,7 +5,8 @@ CREATE TABLE users ( uid Int16, name String, age Int16, - projection p1 (select count(), age group by age) + projection p1 (select count(), age group by age), + projection p2 (select age, name group by age, name) ) ENGINE = MergeTree order by uid; INSERT INTO users VALUES (1231, 'John', 33); @@ -13,3 +14,11 @@ INSERT INTO users VALUES (6666, 'Ksenia', 48); INSERT INTO users VALUES (8888, 'Alice', 50); DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED } + +DELETE FROM users WHERE uid = 8888 SETTINGS lightweight_mutation_projection_mode = 'throw'; -- { serverError NOT_IMPLEMENTED } + +DELETE FROM users WHERE uid = 6666 SETTINGS lightweight_mutation_projection_mode = 'drop'; + +SELECT * FROM users; + +DROP TABLE users; From 636f2506f01e040c450d77bca29dfa8811f00575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 24 Jun 2024 11:35:29 +0200 Subject: [PATCH 094/273] Silence tidy --- base/base/itoa.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index 0997daebbf6..c17a2bfd999 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -59,8 +59,8 @@ namespace jeaiii struct pair { char dd[2]; - constexpr pair(char c) : dd{c, '\0'} { } - constexpr pair(int n) : dd{"0123456789"[n / 10], "0123456789"[n % 10]} { } + constexpr pair(char c) : dd{c, '\0'} { } /// NOLINT(google-explicit-constructor) + constexpr pair(int n) : dd{"0123456789"[n / 10], "0123456789"[n % 10]} { } /// NOLINT(google-explicit-constructor) }; constexpr struct From e20136ce25f85d463f4e3a033ac0a2a1d97431e5 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 24 Jun 2024 17:29:41 +0100 Subject: [PATCH 095/273] fix test --- tests/integration/test_memory_limit_observer/test.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index d8c2a0e8ad7..f19e119c019 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -54,10 +54,13 @@ def test_observe_memory_limit(started_cluster): def test_memory_usage_doesnt_include_page_cache_size(started_cluster): - # populate page cache with 10GB of data - node1.exec_in_container( - ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=10K"] - ) + try: + # populate page cache with 10GB of data; it might be killed by OOM killer but it is fine + node1.exec_in_container( + ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=10K"] + ) + except Exception: + pass observer_refresh_period = int( node1.query( From b1f87c578161578a03ee99ab87899cf4deb2c2ef Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 24 Jun 2024 21:04:04 +0000 Subject: [PATCH 096/273] return back settings and fix build --- src/Backups/BackupIO_AzureBlobStorage.cpp | 17 +++-- src/Core/Settings.h | 6 ++ .../AzureBlobStorageCommon.cpp | 68 ++++++++++++------- .../AzureBlobStorage/AzureBlobStorageCommon.h | 9 ++- .../ObjectStorages/ObjectStorageFactory.cpp | 3 +- .../ObjectStorage/Azure/Configuration.cpp | 12 +++- 6 files changed, 74 insertions(+), 41 deletions(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 596c308ca8a..0ee0160a969 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -41,18 +41,17 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( , blob_path(blob_path_) { auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); + auto settings_ptr = AzureBlobStorage::getRequestSettingsForBackup(context_->getSettingsRef(), allow_azure_native_copy); object_storage = std::make_unique( "BackupReaderAzureBlobStorage", std::move(client_ptr), - AzureBlobStorage::getRequestSettings(context_->getSettingsRef()), + std::move(settings_ptr), connection_params.getContainer(), connection_params.getConnectionURL()); client = object_storage->getAzureBlobStorageClient(); - auto settings_copy = *object_storage->getSettings(); - settings_copy.use_native_copy = allow_azure_native_copy; - settings = std::make_unique(settings_copy); + settings = object_storage->getSettings(); } BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default; @@ -122,8 +121,8 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( const AzureBlobStorage::ConnectionParams & connection_params_, - bool allow_azure_native_copy, const String & blob_path_, + bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, @@ -137,17 +136,17 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( connection_params.endpoint.container_already_exists = true; auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); + auto settings_ptr = AzureBlobStorage::getRequestSettingsForBackup(context_->getSettingsRef(), allow_azure_native_copy); + object_storage = std::make_unique( "BackupWriterAzureBlobStorage", std::move(client_ptr), - AzureBlobStorage::getRequestSettings(context_->getSettingsRef()), + std::move(settings_ptr), connection_params.getContainer(), connection_params.getConnectionURL()); client = object_storage->getAzureBlobStorageClient(); - auto settings_copy = *object_storage->getSettings(); - settings_copy.use_native_copy = allow_azure_native_copy; - settings = std::make_unique(settings_copy); + settings = object_storage->getSettings(); } void BackupWriterAzureBlobStorage::copyFileFromDisk( diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ebdb6860986..9d3fedc3063 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -112,16 +112,22 @@ class IColumn; M(Bool, s3_use_adaptive_timeouts, S3::DEFAULT_USE_ADAPTIVE_TIMEOUTS, "When adaptive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \ M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \ + M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \ M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \ M(Bool, s3_skip_empty_files, false, "Allow to skip empty files in s3 table engine", 0) \ + M(Bool, azure_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in azure engine tables", 0) \ M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \ M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \ + M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \ M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, hdfs_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, azure_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, s3_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in S3 table engine", 0) \ M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \ M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \ + M(UInt64, azure_sdk_max_retries, 10, "Maximum number of retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff beetween retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff beetween retries in azure sdk", 0) \ M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \ M(Bool, s3_disable_checksum, S3::DEFAULT_DISABLE_CHECKSUM, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, S3::DEFAULT_RETRY_ATTEMPTS, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp index c2e4bc0dc89..d9dfedadd48 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp @@ -197,7 +197,7 @@ void processURL(const String & url, const String & container_name, Endpoint & en return; } - size_t pos = url.find('?'); + auto pos = url.find('?'); /// If conneciton_url does not have '?', then its not SAS if (pos == std::string::npos) @@ -273,42 +273,60 @@ BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_di std::unique_ptr getRequestSettings(const Settings & query_settings) { - auto settings_ptr = std::make_unique(); + auto settings = std::make_unique(); - settings_ptr->max_single_part_upload_size = query_settings.azure_max_single_part_upload_size; - settings_ptr->max_single_read_retries = query_settings.azure_max_single_read_retries; - settings_ptr->list_object_keys_size = static_cast(query_settings.azure_list_object_keys_size); + settings->max_single_part_upload_size = query_settings.azure_max_single_part_upload_size; + settings->max_single_read_retries = query_settings.azure_max_single_read_retries; + settings->max_single_download_retries = query_settings.azure_max_single_read_retries; + settings->list_object_keys_size = query_settings.azure_list_object_keys_size; + settings->min_upload_part_size = query_settings.azure_min_upload_part_size; + settings->max_upload_part_size = query_settings.azure_max_upload_part_size; + settings->max_single_part_copy_size = query_settings.azure_max_single_part_copy_size; + settings->max_blocks_in_multipart_upload = query_settings.azure_max_blocks_in_multipart_upload; + settings->max_unexpected_write_error_retries = query_settings.azure_max_unexpected_write_error_retries; + settings->max_inflight_parts_for_one_file = query_settings.azure_max_inflight_parts_for_one_file; + settings->strict_upload_part_size = query_settings.azure_strict_upload_part_size; + settings->upload_part_size_multiply_factor = query_settings.azure_upload_part_size_multiply_factor; + settings->upload_part_size_multiply_parts_count_threshold = query_settings.azure_upload_part_size_multiply_parts_count_threshold; + settings->sdk_max_retries = query_settings.azure_sdk_max_retries; + settings->sdk_retry_initial_backoff_ms = query_settings.azure_sdk_retry_initial_backoff_ms; + settings->sdk_retry_max_backoff_ms = query_settings.azure_sdk_retry_max_backoff_ms; - settings_ptr->sdk_max_retries = query_settings.azure_sdk_max_retries; - settings_ptr->sdk_retry_initial_backoff_ms = query_settings.azure_sdk_retry_initial_backoff_ms; - settings_ptr->sdk_retry_max_backoff_ms = query_settings.azure_sdk_retry_max_backoff_ms; + return settings; +} - return settings_ptr; +std::unique_ptr getRequestSettingsForBackup(const Settings & query_settings, bool use_native_copy) +{ + auto settings = getRequestSettings(query_settings); + settings->use_native_copy = use_native_copy; + return settings; } std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) { auto settings = std::make_unique(); + const auto & settings_ref = context->getSettingsRef(); - settings->max_single_part_upload_size = config.getUInt64(config_prefix + ".max_single_part_upload_size", context->getSettings().azure_max_single_part_upload_size); settings->min_bytes_for_seek = config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024); - settings->max_single_read_retries = config.getInt(config_prefix + ".max_single_read_retries", 3); - settings->max_single_download_retries = config.getInt(config_prefix + ".max_single_download_retries", 3); - settings->list_object_keys_size = config.getInt(config_prefix + ".list_object_keys_size", 1000); - settings->min_upload_part_size = config.getUInt64(config_prefix + ".min_upload_part_size", context->getSettings().azure_min_upload_part_size); - settings->max_upload_part_size = config.getUInt64(config_prefix + ".max_upload_part_size", context->getSettings().azure_max_upload_part_size); - settings->max_single_part_copy_size = config.getUInt64(config_prefix + ".max_single_part_copy_size", context->getSettings().azure_max_single_part_copy_size); settings->use_native_copy = config.getBool(config_prefix + ".use_native_copy", false); - settings->max_blocks_in_multipart_upload = config.getUInt64(config_prefix + ".max_blocks_in_multipart_upload", 50000); - settings->max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", context->getSettings().azure_max_unexpected_write_error_retries); - settings->max_inflight_parts_for_one_file = config.getUInt64(config_prefix + ".max_inflight_parts_for_one_file", context->getSettings().azure_max_inflight_parts_for_one_file); - settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", context->getSettings().azure_strict_upload_part_size); - settings->upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".upload_part_size_multiply_factor", context->getSettings().azure_upload_part_size_multiply_factor); - settings->upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".upload_part_size_multiply_parts_count_threshold", context->getSettings().azure_upload_part_size_multiply_parts_count_threshold); - settings->sdk_max_retries = config.getUInt(config_prefix + ".max_tries", 10); - settings->sdk_retry_initial_backoff_ms = config.getUInt(config_prefix + ".retry_initial_backoff_ms", 10); - settings->sdk_retry_max_backoff_ms = config.getUInt(config_prefix + ".retry_max_backoff_ms", 1000); + settings->max_single_part_upload_size = config.getUInt64(config_prefix + ".max_single_part_upload_size", settings_ref.azure_max_single_part_upload_size); + settings->max_single_read_retries = config.getUInt64(config_prefix + ".max_single_read_retries", settings_ref.azure_max_single_read_retries); + settings->max_single_download_retries = config.getUInt64(config_prefix + ".max_single_download_retries", settings_ref.azure_max_single_read_retries); + settings->list_object_keys_size = config.getUInt64(config_prefix + ".list_object_keys_size", settings_ref.azure_list_object_keys_size); + settings->min_upload_part_size = config.getUInt64(config_prefix + ".min_upload_part_size", settings_ref.azure_min_upload_part_size); + settings->max_upload_part_size = config.getUInt64(config_prefix + ".max_upload_part_size", settings_ref.azure_max_upload_part_size); + settings->max_single_part_copy_size = config.getUInt64(config_prefix + ".max_single_part_copy_size", settings_ref.azure_max_single_part_copy_size); + settings->max_blocks_in_multipart_upload = config.getUInt64(config_prefix + ".max_blocks_in_multipart_upload", settings_ref.azure_max_blocks_in_multipart_upload); + settings->max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", settings_ref.azure_max_unexpected_write_error_retries); + settings->max_inflight_parts_for_one_file = config.getUInt64(config_prefix + ".max_inflight_parts_for_one_file", settings_ref.azure_max_inflight_parts_for_one_file); + settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", settings_ref.azure_strict_upload_part_size); + settings->upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".upload_part_size_multiply_factor", settings_ref.azure_upload_part_size_multiply_factor); + settings->upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".upload_part_size_multiply_parts_count_threshold", settings_ref.azure_upload_part_size_multiply_parts_count_threshold); + + settings->sdk_max_retries = config.getUInt64(config_prefix + ".max_tries", settings_ref.azure_sdk_max_retries); + settings->sdk_retry_initial_backoff_ms = config.getUInt64(config_prefix + ".retry_initial_backoff_ms", settings_ref.azure_sdk_retry_initial_backoff_ms); + settings->sdk_retry_max_backoff_ms = config.getUInt64(config_prefix + ".retry_max_backoff_ms", settings_ref.azure_sdk_retry_max_backoff_ms); if (config.has(config_prefix + ".curl_ip_resolve")) { diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h index 5f9f280ad4a..19ba48ea225 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h @@ -32,22 +32,23 @@ struct RequestSettings RequestSettings() = default; size_t max_single_part_upload_size = 100 * 1024 * 1024; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset - uint64_t min_bytes_for_seek = 1024 * 1024; + size_t min_bytes_for_seek = 1024 * 1024; size_t max_single_read_retries = 3; size_t max_single_download_retries = 3; - int list_object_keys_size = 1000; + size_t list_object_keys_size = 1000; size_t min_upload_part_size = 16 * 1024 * 1024; size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; size_t max_single_part_copy_size = 256 * 1024 * 1024; - bool use_native_copy = false; size_t max_unexpected_write_error_retries = 4; size_t max_inflight_parts_for_one_file = 20; + size_t max_blocks_in_multipart_upload = 50000; size_t strict_upload_part_size = 0; size_t upload_part_size_multiply_factor = 2; size_t upload_part_size_multiply_parts_count_threshold = 500; size_t sdk_max_retries = 10; size_t sdk_retry_initial_backoff_ms = 10; size_t sdk_retry_max_backoff_ms = 1000; + bool use_native_copy = false; using CurlOptions = Azure::Core::Http::CurlTransportOptions; CurlOptions::CurlOptIPResolve curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_WHATEVER; @@ -125,7 +126,9 @@ std::unique_ptr getContainerClient(const ConnectionParams & par BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_disk); AuthMethod getAuthMethod(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); + std::unique_ptr getRequestSettings(const Settings & query_settings); +std::unique_ptr getRequestSettingsForBackup(const Settings & query_settings, bool use_native_copy); std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); } diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 3f32b4b410e..092277aca50 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -332,8 +332,9 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) ObjectStorageType::Azure, config, config_prefix, name, AzureBlobStorage::getContainerClient(params, /*readonly=*/ false), std::move(azure_settings), params.endpoint.prefix.empty() ? params.endpoint.container_name : params.endpoint.container_name + "/" + params.endpoint.prefix, - endpoint.getEndpointWithoutContainer()); + params.endpoint.getEndpointWithoutContainer()); }; + factory.registerObjectStorageType("azure_blob_storage", creator); factory.registerObjectStorageType("azure", creator); } diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp index e4b3d61f659..595d4da3609 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.cpp +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -73,10 +73,16 @@ StorageObjectStorage::QuerySettings StorageAzureConfiguration::getQuerySettings( ObjectStoragePtr StorageAzureConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT { assertInitialized(); - auto client = createClient(is_readonly, /* attempt_to_create_container */true); - auto settings = createSettings(context); + + auto settings = AzureBlobStorage::getRequestSettings(context->getSettingsRef()); + auto client = AzureBlobStorage::getContainerClient(connection_params, is_readonly); + return std::make_unique( - "AzureBlobStorage", std::move(client), std::move(settings), container, getConnectionURL().toString()); + "AzureBlobStorage", + connection_params.createForContainer(), + std::move(settings), + connection_params.getContainer(), + connection_params.getConnectionURL()); } static AzureBlobStorage::ConnectionParams getConnectionParams( From 51f300356e0c0df9c7d001ffe0b7967c7d9f438e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 24 Jun 2024 23:57:33 +0000 Subject: [PATCH 097/273] fix style --- src/Core/Settings.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9d3fedc3063..067e46226ea 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -126,8 +126,8 @@ class IColumn; M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \ M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \ M(UInt64, azure_sdk_max_retries, 10, "Maximum number of retries in azure sdk", 0) \ - M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff beetween retries in azure sdk", 0) \ - M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff beetween retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff between retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff between retries in azure sdk", 0) \ M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \ M(Bool, s3_disable_checksum, S3::DEFAULT_DISABLE_CHECKSUM, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, S3::DEFAULT_RETRY_ATTEMPTS, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ From 615cd96c6e6893973b14cc0190e510894b747b22 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 25 Jun 2024 12:48:43 +0100 Subject: [PATCH 098/273] fix test --- tests/integration/test_memory_limit_observer/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index f19e119c019..2840c830396 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -55,9 +55,9 @@ def test_observe_memory_limit(started_cluster): def test_memory_usage_doesnt_include_page_cache_size(started_cluster): try: - # populate page cache with 10GB of data; it might be killed by OOM killer but it is fine + # populate page cache with 4GB of data; it might be killed by OOM killer but it is fine node1.exec_in_container( - ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=10K"] + ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=4K"] ) except Exception: pass @@ -76,4 +76,4 @@ def test_memory_usage_doesnt_include_page_cache_size(started_cluster): WHERE logger_name = 'CgroupsMemoryUsageObserver' AND message LIKE 'Read current memory usage%bytes%' """ ).strip() - assert int(max_mem_usage_from_cgroup) < 2 * 2**30 + assert int(max_mem_usage_from_cgroup) < 2 * 2 ** 30 From d9f681b39d22a99f8ace10f39ed308621b27a774 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 26 Jun 2024 10:44:17 +0200 Subject: [PATCH 099/273] Disable stacktrace collection in GWPAsan by default --- src/Common/GWPAsan.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Common/GWPAsan.cpp b/src/Common/GWPAsan.cpp index 488f8e2c5dc..ea376609ff4 100644 --- a/src/Common/GWPAsan.cpp +++ b/src/Common/GWPAsan.cpp @@ -57,9 +57,12 @@ static bool guarded_alloc_initialized = [] opts.MaxSimultaneousAllocations = 1024; if (!env_options_raw || !std::string_view{env_options_raw}.contains("SampleRate")) - opts.SampleRate = 50000; + opts.SampleRate = 5000; + + const char * collect_stacktraces = std::getenv("GWP_ASAN_COLLECT_STACKTRACES"); // NOLINT(concurrency-mt-unsafe) + if (collect_stacktraces && std::string_view{collect_stacktraces} == "1") + opts.Backtrace = getBackTrace; - opts.Backtrace = getBackTrace; GuardedAlloc.init(opts); return true; From c30ecee10c8cba5d245916d458ddd60345ea359c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 26 Jun 2024 12:40:55 +0000 Subject: [PATCH 100/273] remove unused code --- src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h | 2 +- src/Storages/ObjectStorage/Azure/Configuration.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index baad3bdf223..2c7ce5e18dc 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -1,5 +1,4 @@ #pragma once -#include "Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h" #include "config.h" #if USE_AZURE_BLOB_STORAGE @@ -9,6 +8,7 @@ #include #include #include +#include namespace Poco { diff --git a/src/Storages/ObjectStorage/Azure/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h index 272d155e337..4e6bfbc0745 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.h +++ b/src/Storages/ObjectStorage/Azure/Configuration.h @@ -57,9 +57,6 @@ protected: std::string blob_path; std::vector blobs_paths; AzureBlobStorage::ConnectionParams connection_params; - - // AzureClientPtr createClient(bool is_read_only, bool attempt_to_create_container); - // AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context); }; } From b694abd5c9910bf5199141d4afef658a7f35a0c6 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 26 Jun 2024 14:18:14 +0000 Subject: [PATCH 101/273] do not optimize with group_by_use_nulls --- .../Passes/FunctionToSubcolumnsPass.cpp | 20 +++++++++++++------ ...71_function_to_subcolumns_fuzzer.reference | 6 ++++++ .../03171_function_to_subcolumns_fuzzer.sql | 10 ++++++++++ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index bc2028e1b43..90051779a26 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -273,18 +273,26 @@ public: if (const auto * join_node = node->as()) { - has_join_use_nulls |= getContext()->getSettingsRef().join_use_nulls; + can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().join_use_nulls; + return; + } + + if (const auto * query_node = node->as()) + { + if (query_node->isGroupByWithCube() || query_node->isGroupByWithRollup() || query_node->isGroupByWithGroupingSets()) + can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().group_by_use_nulls; return; } } std::unordered_set getIdentifiersToOptimize() const { - if (has_join_use_nulls) + if (can_wrap_result_columns_with_nullable) { /// Do not optimize if we have JOIN with setting join_use_null. + /// Do not optimize if we have GROUP BY WITH ROLLUP/CUBE/GROUPING SETS with setting group_by_use_nulls. /// It may change the behaviour if subcolumn can be converted - /// to nullable while the original column cannot. + /// to Nullable while the original column cannot (e.g. for Array type). return {}; } @@ -323,7 +331,7 @@ private: std::unordered_map optimized_identifiers_count; NameSet processed_tables; - bool has_join_use_nulls = false; + bool can_wrap_result_columns_with_nullable = false; void enterImpl(const TableNode & table_node) { @@ -342,9 +350,9 @@ private: const auto & metadata_snapshot = table_node.getStorageSnapshot()->metadata; const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey(); - add_key_columns(primary_key_columns); - const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey(); + + add_key_columns(primary_key_columns); add_key_columns(partition_key_columns); for (const auto & index : metadata_snapshot->getSecondaryIndices()) diff --git a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference index be47c4ab571..1fc6683620c 100644 --- a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference +++ b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.reference @@ -1,3 +1,9 @@ 1 2 1 3 0 +0 450 +1 460 +2 470 +3 480 +4 490 +\N 4950 diff --git a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql index 587288bbfdf..f10019a78dd 100644 --- a/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql +++ b/tests/queries/0_stateless/03171_function_to_subcolumns_fuzzer.sql @@ -37,3 +37,13 @@ FULL OUTER JOIN WHERE empty(arr); DROP TABLE t_func_to_subcolumns_join; + +DROP TABLE IF EXISTS t_func_to_subcolumns_use_nulls; + +CREATE TABLE t_func_to_subcolumns_use_nulls (arr Array(UInt64), v UInt64) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_func_to_subcolumns_use_nulls SELECT range(number % 10), number FROM numbers(100); + +SELECT length(arr) AS n, sum(v) FROM t_func_to_subcolumns_use_nulls GROUP BY n WITH ROLLUP HAVING n <= 4 OR isNull(n) ORDER BY n SETTINGS group_by_use_nulls = 1; + +DROP TABLE t_func_to_subcolumns_use_nulls; From b06eac085bddc3f0ed6a2f5d2ac0a4ddcdd8259c Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 26 Jun 2024 20:01:17 +0200 Subject: [PATCH 102/273] work with review --- src/Common/CollectionOfDerived.h | 21 +- src/Core/Settings.h | 2 +- src/Interpreters/AsynchronousInsertQueue.cpp | 8 +- src/Interpreters/InterpreterCreateQuery.cpp | 6 +- src/Interpreters/InterpreterExplainQuery.cpp | 8 +- src/Interpreters/SystemLog.cpp | 8 +- .../DeduplicationTokenTransforms.cpp | 56 +- .../Transforms/DeduplicationTokenTransforms.h | 22 +- .../Transforms/buildPushingToViewsChain.cpp | 22 +- src/Storages/Distributed/DistributedSink.cpp | 16 +- src/Storages/FileLog/StorageFileLog.cpp | 9 +- src/Storages/HDFS/StorageHDFS.cpp | 1207 ----------------- src/Storages/Kafka/StorageKafka.cpp | 8 +- src/Storages/MaterializedView/RefreshTask.cpp | 8 +- src/Storages/MergeTree/MergeTreeSink.cpp | 24 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 6 - src/Storages/NATS/StorageNATS.cpp | 8 +- .../MaterializedPostgreSQLConsumer.cpp | 8 +- .../PostgreSQLReplicationHandler.cpp | 8 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 8 +- src/Storages/S3Queue/StorageS3Queue.cpp | 8 +- src/Storages/StorageBuffer.cpp | 8 +- src/Storages/StorageDistributed.cpp | 8 +- src/Storages/WindowView/StorageWindowView.cpp | 17 +- 24 files changed, 220 insertions(+), 1284 deletions(-) delete mode 100644 src/Storages/HDFS/StorageHDFS.cpp diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h index 60a91e593f9..97c0c3fbc06 100644 --- a/src/Common/CollectionOfDerived.h +++ b/src/Common/CollectionOfDerived.h @@ -2,6 +2,8 @@ #include +#include + #include #include #include @@ -12,6 +14,16 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/* This is a collections of objects derived from ItemBase. +* Collection contains no more than one instance for each derived type. +* The derived type is used to access the instance. +*/ + template class CollectionOfDerivedItems { @@ -67,15 +79,16 @@ public: { Self result; result.records.reserve(records.size()); - for (const auto & rec: records) + for (const auto & rec : records) result.records.emplace_back(rec.type_idx, rec.ptr->clone()); return result; } void append(Self && other) { + auto middle_idx = records.size(); std::move(other.records.begin(), other.records.end(), std::back_inserter(records)); - std::sort(records.begin(), records.end()); + std::inplace_merge(records.begin(), records.begin() + middle_idx, records.end()); chassert(isUniqTypes()); } @@ -143,7 +156,9 @@ private: return; } - chassert(it->type_idx != type_idx); + if (it->type_idx == type_idx) + throw Exception(ErrorCodes::LOGICAL_ERROR, "inserted items must be unique by their type, type {} is inserted twice", type_idx.name()); + records.emplace(it, type_idx, item); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a272456470a..c400873a47c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -632,7 +632,6 @@ class IColumn; M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ - M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Deprecated.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ @@ -948,6 +947,7 @@ class IColumn; #define OBSOLETE_SETTINGS(M, ALIAS) \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ + MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 1) \ MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \ MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \ MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \ diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 94c024ba786..dd1166a9228 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -301,7 +301,13 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const auto & insert_query = query->as(); insert_query.async_insert_flush = true; - InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns, false, false, false); + InterpreterInsertQuery interpreter( + query, + query_context, + query_context->getSettingsRef().insert_allow_materialized_columns, + /* no_squash */ false, + /* no_destination */ false, + /* async_insert */ false); auto table = interpreter.getTable(insert_query); auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ef222a6842f..dbbdb546260 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1750,9 +1750,9 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) insert, getContext(), getContext()->getSettingsRef().insert_allow_materialized_columns, - false, - false, - false).execute(); + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false).execute(); } return {}; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index b837490dad9..8392f0541f1 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -524,7 +524,13 @@ QueryPipeline InterpreterExplainQuery::executeImpl() } else if (dynamic_cast(ast.getExplainedQuery().get())) { - InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext(), false, false, false, false); + InterpreterInsertQuery insert( + ast.getExplainedQuery(), + getContext(), + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto io = insert.execute(); printPipeline(io.pipeline.getProcessors(), buf); } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 8f97b6ea263..8d4882372ff 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -537,7 +537,13 @@ void SystemLog::flushImpl(const std::vector & to_flush, insert_context->makeQueryContext(); addSettingsForQuery(insert_context, IAST::QueryKind::Insert); - InterpreterInsertQuery interpreter(query_ptr, insert_context, false, false, false, false); + InterpreterInsertQuery interpreter( + query_ptr, + insert_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); BlockIO io = interpreter.execute(); PushingPipelineExecutor executor(io.pipeline); diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index 0701e958877..23e32415f6a 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -26,10 +26,16 @@ void RestoreChunkInfosTransform::transform(Chunk & chunk) namespace DeduplicationToken { -String DB::DeduplicationToken::TokenInfo::getToken(bool enable_assert) const +String TokenInfo::getToken() const { - chassert(stage == VIEW_ID || !enable_assert); + if (stage != VIEW_ID) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + return getTokenImpl(); +} + +String TokenInfo::getTokenImpl() const +{ String result; result.reserve(getTotalSize()); @@ -43,13 +49,20 @@ String DB::DeduplicationToken::TokenInfo::getToken(bool enable_assert) const return result; } -void DB::DeduplicationToken::TokenInfo::addPieceToInitialToken(String part) +String TokenInfo::debugToken() const { - chassert(stage == INITIAL); + return getTokenImpl(); +} + + +void TokenInfo::addPieceToInitialToken(String part) +{ + if (stage != INITIAL) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); addTokenPart(std::move(part)); } -void DB::DeduplicationToken::TokenInfo::closeInitialToken() +void TokenInfo::closeInitialToken() { chassert(stage == INITIAL); stage = VIEW_ID; @@ -57,29 +70,37 @@ void DB::DeduplicationToken::TokenInfo::closeInitialToken() void TokenInfo::setUserToken(const String & token) { - chassert(stage == INITIAL); + if (stage != INITIAL) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + addTokenPart(fmt::format("user-token-{}", token)); stage = SOURCE_BLOCK_NUMBER; } -void TokenInfo::setSourceBlockNumber(size_t sbn) +void TokenInfo::setSourceBlockNumber(size_t block_number) { - chassert(stage == SOURCE_BLOCK_NUMBER); - addTokenPart(fmt::format("source-number-{}", sbn)); + if (stage != SOURCE_BLOCK_NUMBER) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + addTokenPart(fmt::format("source-number-{}", block_number)); stage = VIEW_ID; } void TokenInfo::setViewID(const String & id) { - chassert(stage == VIEW_ID); + if (stage != VIEW_ID) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + addTokenPart(fmt::format("view-id-{}", id)); stage = VIEW_BLOCK_NUMBER; } -void TokenInfo::setViewBlockNumber(size_t mvbn) +void TokenInfo::setViewBlockNumber(size_t block_number) { - chassert(stage == VIEW_BLOCK_NUMBER); - addTokenPart(fmt::format("view-block-{}", mvbn)); + if (stage != VIEW_BLOCK_NUMBER) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + addTokenPart(fmt::format("view-block-{}", block_number)); stage = VIEW_ID; } @@ -91,8 +112,7 @@ void TokenInfo::reset() void TokenInfo::addTokenPart(String part) { - if (!part.empty()) - parts.push_back(std::move(part)); + parts.push_back(std::move(part)); } size_t TokenInfo::getTotalSize() const @@ -107,6 +127,7 @@ size_t TokenInfo::getTotalSize() const return size + parts.size() - 1; } +#ifdef ABORT_ON_LOGICAL_ERROR void CheckTokenTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); @@ -116,12 +137,13 @@ void CheckTokenTransform::transform(Chunk & chunk) if (!must_be_present) { - LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, no token required, token {}", debug, token_info->getToken(false)); + LOG_DEBUG(log, "{}, no token required, token {}", debug, token_info->debugToken()); return; } - LOG_DEBUG(getLogger("CheckInsertDeduplicationTokenTransform"), "{}, token: {}", debug, token_info->getToken(false)); + LOG_DEBUG(log, "{}, token: {}", debug, token_info->debugToken()); } +#endif String SetInitialTokenTransform::getInitialToken(const Chunk & chunk) { diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 27bb21dfad1..ebbbb0f7590 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -4,6 +4,7 @@ #include #include +#include "Common/Logger.h" namespace DB @@ -33,7 +34,8 @@ namespace DeduplicationToken TokenInfo() = default; TokenInfo(const TokenInfo & other) = default; - String getToken(bool enable_assert = true) const; + String getToken() const; + String debugToken() const; bool empty() const { return parts.empty(); } bool tokenInitialized() const { return stage != INITIAL && stage != SOURCE_BLOCK_NUMBER; } @@ -41,15 +43,25 @@ namespace DeduplicationToken void addPieceToInitialToken(String part); void closeInitialToken(); void setUserToken(const String & token); - void setSourceBlockNumber(size_t sbn); + void setSourceBlockNumber(size_t block_number); void setViewID(const String & id); - void setViewBlockNumber(size_t mvbn); + void setViewBlockNumber(size_t block_number); void reset(); private: + String getTokenImpl() const; + void addTokenPart(String part); size_t getTotalSize() const; + /* Token has to be prepared in a particular order. BuildingStage ensure that token is expanded according the foloving order. + * Firstly token has expand with information about the souce. + * INITIAL -- in that stage token is expanded with several hash sums or with the user defined deduplication token. + * SOURCE_BLOCK_NUMBER -- when token is expand with user defined deduplication token, after token has to be expanded with source block number. + * After that token is considered as prepared for usage, hovewer it could be expanded with following details: + * VIEW_ID -- in that stage token is expanded with view id, token could not be used until nex stage is passed. + * VIEW_BLOCK_NUMBER - in that stage token is expanded with view block number. + */ enum BuildingStage { INITIAL, @@ -63,6 +75,8 @@ namespace DeduplicationToken }; +#ifdef ABORT_ON_LOGICAL_ERROR + /// use that class only with debug builds in CI for introspection class CheckTokenTransform : public ISimpleTransform { public: @@ -79,8 +93,10 @@ namespace DeduplicationToken private: String debug; + LoggerPtr log = getLogger("CheckInsertDeduplicationTokenTransform"); bool must_be_present = false; }; +#endif class AddTokenInfoTransform : public ISimpleTransform diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index aba28391879..713ab25600f 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -27,6 +27,7 @@ #include #include #include +#include "base/defines.h" #include #include @@ -225,7 +226,6 @@ std::optional generateViewChain( if (disable_deduplication_for_children) { insert_context->setSetting("insert_deduplicate", Field{false}); - insert_context->setSetting("insert_deduplication_token", Field{""}); } // Processing of blocks for MVs is done block by block, and there will @@ -333,7 +333,13 @@ std::optional generateViewChain( insert_columns.emplace_back(column.name); } - InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false, false); + InterpreterInsertQuery interpreter( + nullptr, + insert_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false` bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; @@ -350,7 +356,9 @@ std::optional generateViewChain( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } +#ifdef ABORT_ON_LOGICAL_ERROR out.addSource(std::make_shared("Before squashing", !disable_deduplication_for_children, out.getInputHeader())); +#endif auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); @@ -394,7 +402,9 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { +#ifdef ABORT_ON_LOGICAL_ERROR out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); +#endif auto executing_inner_query = std::make_shared( storage_header, views_data->views.back(), views_data, disable_deduplication_for_children); @@ -402,7 +412,9 @@ std::optional generateViewChain( out.addSource(std::move(executing_inner_query)); +#ifdef ABORT_ON_LOGICAL_ERROR out.addSource(std::make_shared("Right before Inner query", !disable_deduplication_for_children, out.getInputHeader())); +#endif } return out; @@ -459,8 +471,6 @@ Chain buildPushingToViewsChain( for (const auto & view_id : views) { - LOG_DEBUG(&Poco::Logger::get("PushingToViews"), "dependent view: {}.{}", view_id.database_name, view_id.table_name); - try { auto out = generateViewChain( @@ -569,7 +579,7 @@ Chain buildPushingToViewsChain( } else { - result_chain.addSource(std::make_shared(storage_header)); + result_chain.addSource(std::make_shared(storage_header)); } if (result_chain.empty()) @@ -586,7 +596,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection chunk_infos, bool disable_deduplication_for_children) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection && chunk_infos, bool disable_deduplication_for_children) { const auto & context = view.context; diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 2e3096683d0..8791668cd89 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -420,7 +420,13 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si /// to resolve tables (in InterpreterInsertQuery::getTable()) auto copy_query_ast = query_ast->clone(); - InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized, false, false, false); + InterpreterInsertQuery interp( + copy_query_ast, + job.local_context, + allow_materialized, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interp.execute(); job.pipeline = std::move(block_io.pipeline); @@ -715,7 +721,13 @@ void DistributedSink::writeToLocal(const Cluster::ShardInfo & shard_info, const try { - InterpreterInsertQuery interp(query_ast, context, allow_materialized, false, false, false); + InterpreterInsertQuery interp( + query_ast, + context, + allow_materialized, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interp.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index b86845d48e0..0f9bd8b6ff9 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -743,10 +743,11 @@ bool StorageFileLog::streamToViews() InterpreterInsertQuery interpreter( insert, new_context, - false, - true, - true, - false); + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); + auto block_io = interpreter.execute(); /// Each stream responsible for closing it's files and store meta diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp deleted file mode 100644 index 1ca7c1f71d0..00000000000 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ /dev/null @@ -1,1207 +0,0 @@ -#include "config.h" - -#if USE_HDFS - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include - -#include - -namespace fs = std::filesystem; - -namespace ProfileEvents -{ - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ACCESS_DENIED; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; - extern const int CANNOT_COMPILE_REGEXP; - extern const int CANNOT_DETECT_FORMAT; -} -namespace -{ - struct HDFSFileInfoDeleter - { - /// Can have only one entry (see hdfsGetPathInfo()) - void operator()(hdfsFileInfo * info) { hdfsFreeFileInfo(info, 1); } - }; - using HDFSFileInfoPtr = std::unique_ptr; - - /* Recursive directory listing with matched paths as a result. - * Have the same method in StorageFile. - */ - std::vector LSWithRegexpMatching( - const String & path_for_ls, - const HDFSFSPtr & fs, - const String & for_match) - { - std::vector result; - - const size_t first_glob_pos = for_match.find_first_of("*?{"); - - if (first_glob_pos == std::string::npos) - { - const String path = fs::path(path_for_ls + for_match.substr(1)).lexically_normal(); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path.c_str())); - if (hdfs_info) // NOLINT - { - result.push_back(StorageHDFS::PathWithInfo{ - String(path), - StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}}); - } - return result; - } - - const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/'); - const String suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' - const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/' - - const size_t next_slash_after_glob_pos = suffix_with_globs.find('/', 1); - - const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos); - - re2::RE2 matcher(makeRegexpPatternFromGlobs(current_glob)); - if (!matcher.ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", for_match, matcher.error()); - - HDFSFileInfo ls; - ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length); - if (ls.file_info == nullptr && errno != ENOENT) // NOLINT - { - // ignore file not found exception, keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno. - throw Exception( - ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", prefix_without_globs, String(hdfsGetLastError())); - } - - if (!ls.file_info && ls.length > 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null"); - for (int i = 0; i < ls.length; ++i) - { - const String full_path = fs::path(ls.file_info[i].mName).lexically_normal(); - const size_t last_slash = full_path.rfind('/'); - const String file_name = full_path.substr(last_slash); - const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos; - const bool is_directory = ls.file_info[i].mKind == 'D'; - /// Condition with type of current file_info means what kind of path is it in current iteration of ls - if (!is_directory && !looking_for_directory) - { - if (re2::RE2::FullMatch(file_name, matcher)) - result.push_back(StorageHDFS::PathWithInfo{ - String(full_path), - StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}}); - } - else if (is_directory && looking_for_directory) - { - if (re2::RE2::FullMatch(file_name, matcher)) - { - std::vector result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, - suffix_with_globs.substr(next_slash_after_glob_pos)); - /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check. - std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); - } - } - } - - return result; - } - - std::pair getPathFromUriAndUriWithoutPath(const String & uri) - { - auto pos = uri.find("//"); - if (pos != std::string::npos && pos + 2 < uri.length()) - { - pos = uri.find('/', pos + 2); - if (pos != std::string::npos) - return {uri.substr(pos), uri.substr(0, pos)}; - } - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage HDFS requires valid URL to be set"); - } - - std::vector getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context) - { - HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - HDFSFSPtr fs = createHDFSFS(builder.get()); - - Strings paths = expandSelectionGlob(path_from_uri); - - std::vector res; - - for (const auto & path : paths) - { - auto part_of_res = LSWithRegexpMatching("/", fs, path); - res.insert(res.end(), part_of_res.begin(), part_of_res.end()); - } - return res; - } -} - -StorageHDFS::StorageHDFS( - const String & uri_, - const StorageID & table_id_, - const String & format_name_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - const ContextPtr & context_, - const String & compression_method_, - const bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , WithContext(context_) - , uris({uri_}) - , format_name(format_name_) - , compression_method(compression_method_) - , distributed_processing(distributed_processing_) - , partition_by(partition_by_) -{ - if (format_name != "auto") - FormatFactory::instance().checkFormatName(format_name); - context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); - checkHDFSURL(uri_); - - String path = uri_.substr(uri_.find('/', uri_.find("//") + 2)); - is_path_with_globs = path.find_first_of("*?{") != std::string::npos; - - StorageInMemoryMetadata storage_metadata; - - if (columns_.empty()) - { - ColumnsDescription columns; - if (format_name == "auto") - std::tie(columns, format_name) = getTableStructureAndFormatFromData(uri_, compression_method_, context_); - else - columns = getTableStructureFromData(format_name, uri_, compression_method, context_); - - storage_metadata.setColumns(columns); - } - else - { - if (format_name == "auto") - format_name = getTableStructureAndFormatFromData(uri_, compression_method_, context_).second; - - /// We don't allow special columns in HDFS storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine HDFS doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); -} - -namespace -{ - class ReadBufferIterator : public IReadBufferIterator, WithContext - { - public: - ReadBufferIterator( - const std::vector & paths_with_info_, - const String & uri_without_path_, - std::optional format_, - const String & compression_method_, - const ContextPtr & context_) - : WithContext(context_) - , paths_with_info(paths_with_info_) - , uri_without_path(uri_without_path_) - , format(std::move(format_)) - , compression_method(compression_method_) - { - } - - Data next() override - { - bool is_first = current_index == 0; - /// For default mode check cached columns for all paths on first iteration. - if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(paths_with_info)) - return {nullptr, cached_columns, format}; - } - - StorageHDFS::PathWithInfo path_with_info; - - while (true) - { - if (current_index == paths_with_info.size()) - { - if (is_first) - { - if (format) - throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because all files are empty. " - "You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because all files are empty. You can specify table structure manually"); - } - return {nullptr, std::nullopt, format}; - } - - path_with_info = paths_with_info[current_index++]; - if (getContext()->getSettingsRef().hdfs_skip_empty_files && path_with_info.info && path_with_info.info->size == 0) - continue; - - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - std::vector paths = {path_with_info}; - if (auto cached_columns = tryGetColumnsFromCache(paths)) - return {nullptr, cached_columns, format}; - } - - auto compression = chooseCompressionMethod(path_with_info.path, compression_method); - auto impl = std::make_unique(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); - if (!getContext()->getSettingsRef().hdfs_skip_empty_files || !impl->eof()) - { - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - return {wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)), std::nullopt, format}; - } - } - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs) - return; - - String source = uri_without_path + paths_with_info[current_index - 1].path; - auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = uri_without_path + paths_with_info[current_index - 1].path; - auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addColumns(key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - Strings sources; - sources.reserve(paths_with_info.size()); - std::transform(paths_with_info.begin(), paths_with_info.end(), std::back_inserter(sources), [&](const StorageHDFS::PathWithInfo & path_with_info){ return uri_without_path + path_with_info.path; }); - auto cache_keys = getKeysForSchemaCache(sources, *format, {}, getContext()); - StorageHDFS::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override - { - if (current_index != 0) - return paths_with_info[current_index - 1].path; - - return ""; - } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - chassert(current_index > 0 && current_index <= paths_with_info.size()); - auto path_with_info = paths_with_info[current_index - 1]; - auto compression = chooseCompressionMethod(path_with_info.path, compression_method); - auto impl = std::make_unique(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - return wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); - } - - private: - std::optional tryGetColumnsFromCache(const std::vector & paths_with_info_) - { - auto context = getContext(); - - if (!context->getSettingsRef().schema_inference_use_cache_for_hdfs) - return std::nullopt; - - auto & schema_cache = StorageHDFS::getSchemaCache(context); - for (const auto & path_with_info : paths_with_info_) - { - auto get_last_mod_time = [&]() -> std::optional - { - if (path_with_info.info) - return path_with_info.info->last_mod_time; - - auto builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_with_info.path.c_str())); - if (hdfs_info) - return hdfs_info->mLastMod; - - return std::nullopt; - }; - - String url = uri_without_path + path_with_info.path; - if (format) - { - auto cache_key = getKeyForSchemaCache(url, *format, {}, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; - } - else - { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry for some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) - { - auto cache_key = getKeyForSchemaCache(url, format_name, {}, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } - } - } - } - - return std::nullopt; - } - - const std::vector & paths_with_info; - const String & uri_without_path; - std::optional format; - const String & compression_method; - size_t current_index = 0; - }; -} - -std::pair StorageHDFS::getTableStructureAndFormatFromDataImpl( - std::optional format, - const String & uri, - const String & compression_method, - const ContextPtr & ctx) -{ - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - auto paths_with_info = getPathsList(path_from_uri, uri, ctx); - - if (paths_with_info.empty() && (!format || !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(*format))) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files in HDFS with provided path." - " You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The data format cannot be detected by the contents of the files, because there are no files in HDFS with provided path." - " You can specify the format manually"); - } - - ReadBufferIterator read_buffer_iterator(paths_with_info, uri_without_path, format, compression_method, ctx); - if (format) - return {readSchemaFromFormat(*format, std::nullopt, read_buffer_iterator, ctx), *format}; - return detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, ctx); -} - -std::pair StorageHDFS::getTableStructureAndFormatFromData(const String & uri, const String & compression_method, const ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(std::nullopt, uri, compression_method, ctx); -} - -ColumnsDescription StorageHDFS::getTableStructureFromData(const String & format, const String & uri, const String & compression_method, const DB::ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(format, uri, compression_method, ctx).first; -} - -class HDFSSource::DisclosedGlobIterator::Impl -{ -public: - Impl(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - { - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - uris = getPathsList(path_from_uri, uri_without_path, context); - ActionsDAGPtr filter_dag; - if (!uris.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - std::vector paths; - paths.reserve(uris.size()); - for (const auto & path_with_info : uris) - paths.push_back(path_with_info.path); - - VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, context); - } - auto file_progress_callback = context->getFileProgressCallback(); - - for (auto & elem : uris) - { - elem.path = uri_without_path + elem.path; - if (file_progress_callback && elem.info) - file_progress_callback(FileProgress(0, elem.info->size)); - } - uris_iter = uris.begin(); - } - - StorageHDFS::PathWithInfo next() - { - std::lock_guard lock(mutex); - if (uris_iter != uris.end()) - { - auto answer = *uris_iter; - ++uris_iter; - return answer; - } - return {}; - } -private: - std::mutex mutex; - std::vector uris; - std::vector::iterator uris_iter; -}; - -class HDFSSource::URISIterator::Impl : WithContext -{ -public: - explicit Impl(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context_) - : WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback()) - { - ActionsDAGPtr filter_dag; - if (!uris.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - std::vector paths; - paths.reserve(uris.size()); - for (const auto & uri : uris) - paths.push_back(getPathFromUriAndUriWithoutPath(uri).first); - - VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, getContext()); - } - - if (!uris.empty()) - { - auto path_and_uri = getPathFromUriAndUriWithoutPath(uris[0]); - builder = createHDFSBuilder(path_and_uri.second + "/", getContext()->getGlobalContext()->getConfigRef()); - fs = createHDFSFS(builder.get()); - } - } - - StorageHDFS::PathWithInfo next() - { - String uri; - HDFSFileInfoPtr hdfs_info; - do - { - size_t current_index = index.fetch_add(1); - if (current_index >= uris.size()) - return {"", {}}; - - uri = uris[current_index]; - auto path_and_uri = getPathFromUriAndUriWithoutPath(uri); - hdfs_info.reset(hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str())); - } - /// Skip non-existed files. - while (!hdfs_info && String(hdfsGetLastError()).find("FileNotFoundException") != std::string::npos); - - std::optional info; - if (hdfs_info) - { - info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; - if (file_progress_callback) - file_progress_callback(FileProgress(0, hdfs_info->mSize)); - } - - return {uri, info}; - } - -private: - std::atomic_size_t index = 0; - Strings uris; - HDFSBuilderWrapper builder; - HDFSFSPtr fs; - std::function file_progress_callback; -}; - -HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - : pimpl(std::make_shared(uri, predicate, virtual_columns, context)) {} - -StorageHDFS::PathWithInfo HDFSSource::DisclosedGlobIterator::next() -{ - return pimpl->next(); -} - -HDFSSource::URISIterator::URISIterator(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - : pimpl(std::make_shared(uris_, predicate, virtual_columns, context)) -{ -} - -StorageHDFS::PathWithInfo HDFSSource::URISIterator::next() -{ - return pimpl->next(); -} - -HDFSSource::HDFSSource( - const ReadFromFormatInfo & info, - StorageHDFSPtr storage_, - const ContextPtr & context_, - UInt64 max_block_size_, - std::shared_ptr file_iterator_, - bool need_only_count_) - : ISource(info.source_header, false) - , WithContext(context_) - , storage(std::move(storage_)) - , block_for_format(info.format_header) - , requested_columns(info.requested_columns) - , requested_virtual_columns(info.requested_virtual_columns) - , max_block_size(max_block_size_) - , file_iterator(file_iterator_) - , columns_description(info.columns_description) - , need_only_count(need_only_count_) -{ - initialize(); -} - -HDFSSource::~HDFSSource() = default; - -bool HDFSSource::initialize() -{ - bool skip_empty_files = getContext()->getSettingsRef().hdfs_skip_empty_files; - StorageHDFS::PathWithInfo path_with_info; - while (true) - { - path_with_info = (*file_iterator)(); - if (path_with_info.path.empty()) - return false; - - if (path_with_info.info && skip_empty_files && path_with_info.info->size == 0) - continue; - - current_path = path_with_info.path; - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path); - - std::optional file_size; - if (!path_with_info.info) - { - auto builder = createHDFSBuilder(uri_without_path + "/", getContext()->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_from_uri.c_str())); - if (hdfs_info) - path_with_info.info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; - } - - if (path_with_info.info) - file_size = path_with_info.info->size; - - auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method); - auto impl = std::make_unique( - uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings(), 0, false, file_size); - if (!skip_empty_files || !impl->eof()) - { - impl->setProgressCallback(getContext()); - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); - break; - } - } - - current_path = path_with_info.path; - current_file_size = path_with_info.info ? std::optional(path_with_info.info->size) : std::nullopt; - - QueryPipelineBuilder builder; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(path_with_info) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use a special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - auto source = std::make_shared(block_for_format, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - std::optional max_parsing_threads; - if (need_only_count) - max_parsing_threads = 1; - - input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, std::nullopt, max_parsing_threads); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - if (columns_description.hasDefaults()) - { - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, columns_description, *input_format, getContext()); - }); - } - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from the chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - return true; -} - -String HDFSSource::getName() const -{ - return "HDFSSource"; -} - -Chunk HDFSSource::generate() -{ - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (input_format) - chunk_size = input_format->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, current_path, current_file_size); - return chunk; - } - - if (input_format && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(current_path, total_rows_in_file); - - total_rows_in_file = 0; - - reader.reset(); - pipeline.reset(); - input_format.reset(); - read_buf.reset(); - - if (!initialize()) - break; - } - return {}; -} - -void HDFSSource::addNumRowsToCache(const String & path, size_t num_rows) -{ - auto cache_key = getKeyForSchemaCache(path, storage->format_name, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional HDFSSource::tryGetNumRowsFromCache(const StorageHDFS::PathWithInfo & path_with_info) -{ - auto cache_key = getKeyForSchemaCache(path_with_info.path, storage->format_name, std::nullopt, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - if (path_with_info.info) - return path_with_info.info->last_mod_time; - return std::nullopt; - }; - - return StorageHDFS::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -class HDFSSink : public SinkToStorage -{ -public: - HDFSSink(const String & uri, - const String & format, - const Block & sample_block, - const ContextPtr & context, - const CompressionMethod compression_method) - : SinkToStorage(sample_block) - { - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique( - uri, context->getGlobalContext()->getConfigRef(), context->getSettingsRef().hdfs_replication, context->getWriteSettings()), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context); - } - - String getName() const override { return "HDFSSink"; } - - void consume(Chunk & chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->sync(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf->finalize(); - } - - std::unique_ptr write_buf; - OutputFormatPtr writer; - std::mutex cancel_mutex; - bool cancelled = false; -}; - -namespace -{ - std::optional checkAndGetNewFileOnInsertIfNeeded(const ContextPtr & context, const String & uri, size_t sequence_number) - { - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - - HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - HDFSFSPtr fs = createHDFSFS(builder.get()); - - if (context->getSettingsRef().hdfs_truncate_on_insert || hdfsExists(fs.get(), path_from_uri.c_str())) - return std::nullopt; - - if (context->getSettingsRef().hdfs_create_new_file_on_insert) - { - auto pos = uri.find_first_of('.', uri.find_last_of('/')); - String new_uri; - do - { - new_uri = uri.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : uri.substr(pos)); - ++sequence_number; - } - while (!hdfsExists(fs.get(), new_uri.c_str())); - - return new_uri; - } - - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "File with path {} already exists. If you want to overwrite it, enable setting hdfs_truncate_on_insert, " - "if you want to create new file on each insert, enable setting hdfs_create_new_file_on_insert", - path_from_uri); - } -} - -class PartitionedHDFSSink : public PartitionedSink -{ -public: - PartitionedHDFSSink( - const ASTPtr & partition_by, - const String & uri_, - const String & format_, - const Block & sample_block_, - ContextPtr context_, - const CompressionMethod compression_method_) - : PartitionedSink(partition_by, context_, sample_block_) - , uri(uri_) - , format(format_) - , sample_block(sample_block_) - , context(context_) - , compression_method(compression_method_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto path = PartitionedSink::replaceWildcards(uri, partition_id); - PartitionedSink::validatePartitionKey(path, true); - if (auto new_path = checkAndGetNewFileOnInsertIfNeeded(context, path, 1)) - path = *new_path; - return std::make_shared(path, format, sample_block, context, compression_method); - } - -private: - const String uri; - const String format; - const Block sample_block; - ContextPtr context; - const CompressionMethod compression_method; -}; - - -bool StorageHDFS::supportsSubsetOfColumns(const ContextPtr & context_) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context_); -} - -class ReadFromHDFS : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromHDFS"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromHDFS( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - ReadFromFormatInfo info_, - bool need_only_count_, - std::shared_ptr storage_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter( - DataStream{.header = std::move(sample_block)}, - column_names_, - query_info_, - storage_snapshot_, - context_) - , info(std::move(info_)) - , need_only_count(need_only_count_) - , storage(std::move(storage_)) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - } - -private: - ReadFromFormatInfo info; - const bool need_only_count; - std::shared_ptr storage; - - size_t max_block_size; - size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - -void ReadFromHDFS::applyFilters(ActionDAGNodes added_filter_nodes) -{ - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); -} - -void StorageHDFS::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context_, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_)); - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && context_->getSettingsRef().optimize_count_from_files; - - auto this_ptr = std::static_pointer_cast(shared_from_this()); - - auto reading = std::make_unique( - column_names, - query_info, - storage_snapshot, - context_, - read_from_format_info.source_header, - std::move(read_from_format_info), - need_only_count, - std::move(this_ptr), - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - if (storage->distributed_processing) - { - iterator_wrapper = std::make_shared( - [callback = context->getReadTaskCallback()]() -> StorageHDFS::PathWithInfo { - return StorageHDFS::PathWithInfo{callback(), std::nullopt}; - }); - } - else if (storage->is_path_with_globs) - { - /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared(storage->uris[0], predicate, storage->getVirtualsList(), context); - iterator_wrapper = std::make_shared([glob_iterator]() - { - return glob_iterator->next(); - }); - } - else - { - auto uris_iterator = std::make_shared(storage->uris, predicate, storage->getVirtualsList(), context); - iterator_wrapper = std::make_shared([uris_iterator]() - { - return uris_iterator->next(); - }); - } -} - -void ReadFromHDFS::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - createIterator(nullptr); - - Pipes pipes; - for (size_t i = 0; i < num_streams; ++i) - { - pipes.emplace_back(std::make_shared( - info, - storage, - context, - max_block_size, - iterator_wrapper, - need_only_count)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageHDFS::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context_, bool /*async_insert*/) -{ - String current_uri = uris.front(); - - bool has_wildcards = current_uri.find(PartitionedSink::PARTITION_ID_WILDCARD) != String::npos; - const auto * insert_query = dynamic_cast(query.get()); - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && has_wildcards; - - if (is_partitioned_implementation) - { - String path = current_uri.substr(current_uri.find('/', current_uri.find("//") + 2)); - if (PartitionedSink::replaceWildcards(path, "").find_first_of("*?{") != std::string::npos) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "URI '{}' contains globs, so the table is in readonly mode", uris.back()); - - return std::make_shared( - partition_by_ast, - current_uri, - format_name, - metadata_snapshot->getSampleBlock(), - context_, - chooseCompressionMethod(current_uri, compression_method)); - } - else - { - if (is_path_with_globs) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "URI '{}' contains globs, so the table is in readonly mode", uris.back()); - - if (auto new_uri = checkAndGetNewFileOnInsertIfNeeded(context_, uris.front(), uris.size())) - { - uris.push_back(*new_uri); - current_uri = *new_uri; - } - - return std::make_shared(current_uri, - format_name, - metadata_snapshot->getSampleBlock(), - context_, - chooseCompressionMethod(current_uri, compression_method)); - } -} - -void StorageHDFS::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) -{ - const size_t begin_of_path = uris[0].find('/', uris[0].find("//") + 2); - const String url = uris[0].substr(0, begin_of_path); - - HDFSBuilderWrapper builder = createHDFSBuilder(url + "/", local_context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - - for (const auto & uri : uris) - { - const String path = uri.substr(begin_of_path); - int ret = hdfsDelete(fs.get(), path.data(), 0); - if (ret) - throw Exception(ErrorCodes::ACCESS_DENIED, "Unable to truncate hdfs table: {}", std::string(hdfsGetLastError())); - } -} - - -void registerStorageHDFS(StorageFactory & factory) -{ - factory.registerStorage("HDFS", [](const StorageFactory::Arguments & args) - { - ASTs & engine_args = args.engine_args; - - if (engine_args.empty() || engine_args.size() > 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage HDFS requires 1, 2 or 3 arguments: " - "url, name of used format (taken from file extension by default) and optional compression method."); - - engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext()); - - String url = checkAndGetLiteralArgument(engine_args[0], "url"); - - String format_name = "auto"; - if (engine_args.size() > 1) - { - engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext()); - format_name = checkAndGetLiteralArgument(engine_args[1], "format_name"); - } - - if (format_name == "auto") - format_name = FormatFactory::instance().tryGetFormatFromFileName(url).value_or("auto"); - - String compression_method; - if (engine_args.size() == 3) - { - engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.getLocalContext()); - compression_method = checkAndGetLiteralArgument(engine_args[2], "compression_method"); - } else compression_method = "auto"; - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - return std::make_shared( - url, args.table_id, format_name, args.columns, args.constraints, args.comment, args.getContext(), compression_method, false, partition_by); - }, - { - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::HDFS, - }); -} - -SchemaCache & StorageHDFS::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_hdfs", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} - -} - -#endif diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index f92c6ae67c9..809401bb279 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1099,7 +1099,13 @@ bool StorageKafka::streamToViews() // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true, false); + InterpreterInsertQuery interpreter( + insert, + kafka_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); // Create a stream for each consumer and join them in a union stream diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index 57d75b969c3..ff5214a5e51 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -377,7 +377,13 @@ void RefreshTask::executeRefreshUnlocked(std::shared_ptr(); - if (storage.getDeduplicationLog()) - { - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", - storage.getStorageID().getNameForLogs()); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); - if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo has to be initialized with user token for table: {}, user dedup token {}", - storage.getStorageID().getNameForLogs(), - context->getSettingsRef().insert_deduplication_token.value); - - if (token_info->tokenInitialized()) - block_dedup_token = token_info->getToken(); - } + String block_dedup_token; + if (token_info->tokenInitialized()) + block_dedup_token = token_info->getToken(); for (auto & current_block : part_blocks) { @@ -161,7 +152,6 @@ void MergeTreeSink::consume(Chunk & chunk) partitions = DelayedPartitions{}; } - /// TODO block_dedup_token partitions.emplace_back(MergeTreeSink::DelayedChunk::Partition { .temp_part = std::move(temp_part), diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index cf3af59118e..b15b80864e5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -304,12 +304,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); - if (!token_info->tokenInitialized() && !context->getSettingsRef().insert_deduplication_token.value.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo has to be initialized with user token for table: {} user dedup token {}", - storage.getStorageID().getNameForLogs(), - context->getSettingsRef().insert_deduplication_token.value); - if (token_info->tokenInitialized()) block_dedup_token = token_info->getToken(); } diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 9c6d70f2c5b..8f0e2d76473 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -644,7 +644,13 @@ bool StorageNATS::streamToViews() insert->table_id = table_id; // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, nats_context, false, true, true, false); + InterpreterInsertQuery interpreter( + insert, + nats_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index 57c8d24ccc2..44479bd01e2 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -697,7 +697,13 @@ void MaterializedPostgreSQLConsumer::syncTables() insert->table_id = storage->getStorageID(); insert->columns = std::make_shared(buffer->columns_ast); - InterpreterInsertQuery interpreter(insert, insert_context, true, false, false, false); + InterpreterInsertQuery interpreter( + insert, + insert_context, + /* allow_materialized */ true, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto io = interpreter.execute(); auto input = std::make_shared( result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows())); diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 4a5a621aa43..f632e553a0d 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -437,7 +437,13 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection auto insert_context = materialized_storage->getNestedTableContext(); - InterpreterInsertQuery interpreter(insert, insert_context, false, false, false, false); + InterpreterInsertQuery interpreter( + insert, + insert_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interpreter.execute(); const StorageInMemoryMetadata & storage_metadata = nested_storage->getInMemoryMetadata(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 5bf5ab9b2f5..f3d2aff68c8 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1129,7 +1129,13 @@ bool StorageRabbitMQ::tryStreamToViews() } // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true, false); + InterpreterInsertQuery interpreter( + insert, + rabbitmq_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index b9aa7881bdd..d1607843364 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -477,7 +477,13 @@ bool StorageS3Queue::streamToViews() while (!shutdown_called && !file_iterator->isFinished()) { - InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true, false); + InterpreterInsertQuery interpreter( + insert, + s3queue_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); auto read_from_format_info = prepareReadingFromFormat( block_io.pipeline.getHeader().getNames(), diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 10eecd63e3c..b064fba223a 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -1020,7 +1020,13 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl auto insert_context = Context::createCopy(getContext()); insert_context->makeQueryContext(); - InterpreterInsertQuery interpreter(insert, insert_context, allow_materialized, false, false, false); + InterpreterInsertQuery interpreter( + insert, + insert_context, + allow_materialized, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interpreter.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 1c129e34170..67586985ce8 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1050,7 +1050,13 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu const auto & shard_info = shards_info[shard_index]; if (shard_info.isLocal()) { - InterpreterInsertQuery interpreter(new_query, query_context, false, false, false, false); + InterpreterInsertQuery interpreter( + new_query, + query_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); pipeline.addCompletedPipeline(interpreter.execute().pipeline); } else diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 65aa06f8506..b1dd5f8a114 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -690,7 +690,13 @@ inline void StorageWindowView::fire(UInt32 watermark) StoragePtr target_table = getTargetTable(); auto insert = std::make_shared(); insert->table_id = target_table->getStorageID(); - InterpreterInsertQuery interpreter(insert, getContext(), false, false, false, false); + InterpreterInsertQuery interpreter( + insert, + getContext(), + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interpreter.execute(); auto pipe = Pipe(std::make_shared(blocks, header)); @@ -1548,11 +1554,12 @@ void StorageWindowView::writeIntoWindowView( return std::make_shared(stream_header); }); +#ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { - return std::make_shared("StorageWindowView: Afrer tmp table before squasing", true, stream_header); + return std::make_shared("StorageWindowView: Afrer tmp table before squashing", true, stream_header); }); - +#endif builder.addSimpleTransform([&](const Block & current_header) { @@ -1593,10 +1600,12 @@ void StorageWindowView::writeIntoWindowView( lateness_upper_bound); }); +#ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { return std::make_shared("StorageWindowView: Afrer WatermarkTransform", true, stream_header); }); +#endif auto inner_table = window_view.getInnerTable(); auto lock = inner_table->lockForShare( @@ -1617,10 +1626,12 @@ void StorageWindowView::writeIntoWindowView( builder.addSimpleTransform([&](const Block & header_) { return std::make_shared(header_, convert_actions); }); } +#ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { return std::make_shared("StorageWindowView: Before out", true, stream_header); }); +#endif builder.addChain(Chain(std::move(output))); builder.setSinks([&](const Block & cur_header, Pipe::StreamType) From a38f8d6c459ed597ce60de0108ad79dac6044b37 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 26 Jun 2024 21:03:53 +0200 Subject: [PATCH 103/273] rework TokenInfo::BuildingStage --- .../DeduplicationTokenTransforms.cpp | 61 +++++++++++-------- .../Transforms/DeduplicationTokenTransforms.h | 54 ++++++++++------ src/Storages/MergeTree/MergeTreeSink.cpp | 10 +-- .../MergeTree/ReplicatedMergeTreeSink.cpp | 10 +-- 4 files changed, 82 insertions(+), 53 deletions(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index 23e32415f6a..10c21249ebc 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -28,8 +28,8 @@ namespace DeduplicationToken String TokenInfo::getToken() const { - if (stage != VIEW_ID) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + if (!isDefined()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is not defined, stage {}, token {}", stage, debugToken()); return getTokenImpl(); } @@ -54,59 +54,70 @@ String TokenInfo::debugToken() const return getTokenImpl(); } - -void TokenInfo::addPieceToInitialToken(String part) +void TokenInfo::addChunkHash(String part) { - if (stage != INITIAL) + if (stage == UNDEFINED) + stage = DEFINE_SOURCE_WITH_HASHES; + + if (stage != DEFINE_SOURCE_WITH_HASHES) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + addTokenPart(std::move(part)); } -void TokenInfo::closeInitialToken() +void TokenInfo::defineSourceWithChunkHashes() { - chassert(stage == INITIAL); - stage = VIEW_ID; + if (stage != DEFINE_SOURCE_WITH_HASHES) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + stage = DEFINED; } void TokenInfo::setUserToken(const String & token) { - if (stage != INITIAL) + if (stage == UNDEFINED) + stage = DEFINE_SOURCE_USER_TOKEN; + + if (stage != DEFINE_SOURCE_USER_TOKEN) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); addTokenPart(fmt::format("user-token-{}", token)); - stage = SOURCE_BLOCK_NUMBER; } -void TokenInfo::setSourceBlockNumber(size_t block_number) +void TokenInfo::defineSourceWithUserToken(size_t block_number) { - if (stage != SOURCE_BLOCK_NUMBER) + if (stage != DEFINE_SOURCE_USER_TOKEN) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); addTokenPart(fmt::format("source-number-{}", block_number)); - stage = VIEW_ID; + + stage = DEFINED; } void TokenInfo::setViewID(const String & id) { - if (stage != VIEW_ID) + if (stage == DEFINED) + stage = DEFINE_VIEW; + + if (stage != DEFINE_VIEW) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); addTokenPart(fmt::format("view-id-{}", id)); - stage = VIEW_BLOCK_NUMBER; } -void TokenInfo::setViewBlockNumber(size_t block_number) +void TokenInfo::defineViewID(size_t block_number) { - if (stage != VIEW_BLOCK_NUMBER) + if (stage != DEFINE_VIEW) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); addTokenPart(fmt::format("view-block-{}", block_number)); - stage = VIEW_ID; + + stage = DEFINED; } void TokenInfo::reset() { - stage = INITIAL; + stage = UNDEFINED; parts.clear(); } @@ -145,7 +156,7 @@ void CheckTokenTransform::transform(Chunk & chunk) } #endif -String SetInitialTokenTransform::getInitialToken(const Chunk & chunk) +String SetInitialTokenTransform::getChunkHash(const Chunk & chunk) { SipHash hash; for (const auto & colunm : chunk.getColumns()) @@ -165,11 +176,11 @@ void SetInitialTokenTransform::transform(Chunk & chunk) ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetInitialTokenTransform"); - if (token_info->tokenInitialized()) + if (token_info->isDefined()) return; - token_info->addPieceToInitialToken(getInitialToken(chunk)); - token_info->closeInitialToken(); + token_info->addChunkHash(getChunkHash(chunk)); + token_info->defineSourceWithChunkHashes(); } void SetUserTokenTransform::transform(Chunk & chunk) @@ -189,7 +200,7 @@ void SetSourceBlockNumberTransform::transform(Chunk & chunk) throw Exception( ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform"); - token_info->setSourceBlockNumber(block_number++); + token_info->defineSourceWithUserToken(block_number++); } void SetViewIDTransform::transform(Chunk & chunk) @@ -209,7 +220,7 @@ void SetViewBlockNumberTransform::transform(Chunk & chunk) throw Exception( ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform"); - token_info->setViewBlockNumber(block_number++); + token_info->defineViewID(block_number++); } void ResetTokenTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index ebbbb0f7590..416d4bb5f62 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -38,14 +38,18 @@ namespace DeduplicationToken String debugToken() const; bool empty() const { return parts.empty(); } - bool tokenInitialized() const { return stage != INITIAL && stage != SOURCE_BLOCK_NUMBER; } - void addPieceToInitialToken(String part); - void closeInitialToken(); + bool isDefined() const { return stage == DEFINED; } + + void addChunkHash(String part); + void defineSourceWithChunkHashes(); + void setUserToken(const String & token); - void setSourceBlockNumber(size_t block_number); + void defineSourceWithUserToken(size_t block_number); + void setViewID(const String & id); - void setViewBlockNumber(size_t block_number); + void defineViewID(size_t block_number); + void reset(); private: @@ -54,23 +58,37 @@ namespace DeduplicationToken void addTokenPart(String part); size_t getTotalSize() const; - /* Token has to be prepared in a particular order. BuildingStage ensure that token is expanded according the foloving order. - * Firstly token has expand with information about the souce. - * INITIAL -- in that stage token is expanded with several hash sums or with the user defined deduplication token. - * SOURCE_BLOCK_NUMBER -- when token is expand with user defined deduplication token, after token has to be expanded with source block number. - * After that token is considered as prepared for usage, hovewer it could be expanded with following details: - * VIEW_ID -- in that stage token is expanded with view id, token could not be used until nex stage is passed. - * VIEW_BLOCK_NUMBER - in that stage token is expanded with view block number. + /* Token has to be prepared in a particular order. + * BuildingStage ensures that token is expanded according the foloving order. + * Firstly token is expanded with information about the source. + * It could be done with two ways: add several hash sums from the source chunks or provide user defined deduplication token and its sequentional block number. + * + * transition // method + * UNDEFINED -> DEFINE_SOURCE_WITH_HASHES // addChunkHash + * DEFINE_SOURCE_WITH_HASHES -> DEFINE_SOURCE_WITH_HASHES // addChunkHash + * DEFINE_SOURCE_WITH_HASHES -> DEFINED // defineSourceWithChankHashes + * + * transition // method + * UNDEFINED -> DEFINE_SOURCE_USER_TOKEN // setUserToken + * DEFINE_SOURCE_USER_TOKEN -> DEFINED // defineSourceWithUserToken + * + * After token is define it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views. + * + * transition // method + * DEFINED -> DEFINE_VIEW // setViewID + * DEFINE_VIEW -> DEFINED // defineViewID */ + enum BuildingStage { - INITIAL, - SOURCE_BLOCK_NUMBER, - VIEW_ID, - VIEW_BLOCK_NUMBER, + UNDEFINED, + DEFINE_SOURCE_WITH_HASHES, + DEFINE_SOURCE_USER_TOKEN, + DEFINE_VIEW, + DEFINED, }; - BuildingStage stage = INITIAL; + BuildingStage stage = UNDEFINED; std::vector parts; }; @@ -128,7 +146,7 @@ namespace DeduplicationToken void transform(Chunk & chunk) override; - static String getInitialToken(const Chunk & chunk); + static String getChunkHash(const Chunk & chunk); }; class ResetTokenTransform : public ISimpleTransform diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 7cb89fa7239..532fa718efd 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -92,7 +92,7 @@ void MergeTreeSink::consume(Chunk & chunk) storage.getStorageID().getNameForLogs()); String block_dedup_token; - if (token_info->tokenInitialized()) + if (token_info->isDefined()) block_dedup_token = token_info->getToken(); for (auto & current_block : part_blocks) @@ -119,10 +119,10 @@ void MergeTreeSink::consume(Chunk & chunk) if (!temp_part.part) continue; - if (!token_info->tokenInitialized()) + if (!token_info->isDefined()) { chassert(temp_part.part); - token_info->addPieceToInitialToken(temp_part.part->getPartBlockIDHash()); + token_info->addChunkHash(temp_part.part->getPartBlockIDHash()); } if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) @@ -161,9 +161,9 @@ void MergeTreeSink::consume(Chunk & chunk) }); } - if (!token_info->tokenInitialized()) + if (!token_info->isDefined()) { - token_info->closeInitialToken(); + token_info->defineSourceWithChunkHashes(); } finishDelayedChunk(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index b15b80864e5..228b5c596ab 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -304,7 +304,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); - if (token_info->tokenInitialized()) + if (token_info->isDefined()) block_dedup_token = token_info->getToken(); } @@ -371,10 +371,10 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - if (!token_info->tokenInitialized()) + if (!token_info->isDefined()) { chassert(temp_part.part); - token_info->addPieceToInitialToken(temp_part.part->getPartBlockIDHash()); + token_info->addChunkHash(temp_part.part->getPartBlockIDHash()); } } @@ -421,9 +421,9 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) )); } - if (!token_info->tokenInitialized()) + if (!token_info->isDefined()) { - token_info->closeInitialToken(); + token_info->defineSourceWithChunkHashes(); } finishDelayedChunk(zookeeper); From 8efa045a97517bbcf28b80c178e9df84d92973b2 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 27 Jun 2024 00:09:55 +0200 Subject: [PATCH 104/273] fix resolving conflicts with squashing --- src/Interpreters/Squashing.cpp | 86 ++++++++++--------- src/Interpreters/Squashing.h | 29 ++++--- .../Transforms/ApplySquashingTransform.h | 16 +--- .../Transforms/PlanSquashingTransform.cpp | 21 ++--- .../Transforms/PlanSquashingTransform.h | 2 +- .../Transforms/SquashingTransform.cpp | 22 ++--- .../Transforms/SquashingTransform.h | 1 - src/Server/TCPHandler.cpp | 2 +- src/Storages/MergeTree/MutateTask.cpp | 2 +- 9 files changed, 85 insertions(+), 96 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index bf363a21400..dbf16452287 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -10,22 +10,24 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -Squashing::Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_) +Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) + , header(header_) { } Chunk Squashing::flush() { - decltype(chunks_to_merge_vec) to_convert; - to_convert.swap(chunks_to_merge_vec); - return convertToChunk(std::move(to_convert)); + if (!accumulated) + return {}; + + return convertToChunk(accumulated.extract()); } Chunk Squashing::squash(Chunk && input_chunk) { - if (input_chunk.getChunkInfos().empty()) + if (!input_chunk) return Chunk(); auto squash_info = input_chunk.getChunkInfos().extract(); @@ -42,48 +44,39 @@ Chunk Squashing::add(Chunk && input_chunk) return {}; /// Just read block is already enough. - if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes())) + if (isEnoughSize(input_chunk)) { /// If no accumulated data, return just read block. - if (chunks_to_merge_vec.empty()) + if (!accumulated) { - chunks_to_merge_vec.push_back(std::move(input_chunk)); - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - chunks_to_merge_vec.clear(); - return res_chunk; + accumulated.add(std::move(input_chunk)); + return convertToChunk(accumulated.extract()); } /// Return accumulated data (maybe it has small size) and place new block to accumulated data. - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - chunks_to_merge_vec.clear(); - changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); - chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(accumulated.extract()); + accumulated.add(std::move(input_chunk)); return res_chunk; } /// Accumulated block is already enough. - if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + if (isEnoughSize()) { /// Return accumulated data and place new block to accumulated data. - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - chunks_to_merge_vec.clear(); - changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); - chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(accumulated.extract()); + accumulated.add(std::move(input_chunk)); return res_chunk; } /// Pushing data into accumulating vector - expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); - chunks_to_merge_vec.push_back(std::move(input_chunk)); + accumulated.add(std::move(input_chunk)); /// If accumulated data is big enough, we send it - if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + if (isEnoughSize()) { - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - changeCurrentSize(0, 0); - chunks_to_merge_vec.clear(); - return res_chunk; + return convertToChunk(accumulated.extract()); } + return {}; } @@ -95,7 +88,8 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const auto info = std::make_shared(); info->chunks = std::move(chunks); - auto aggr_chunk = Chunk(); + // It is imortant that chunk is not empty, it has to have colums even if they are emty + auto aggr_chunk = Chunk(header.getColumns(), 0); aggr_chunk.getChunkInfos().add(std::move(info)); return aggr_chunk; @@ -136,22 +130,34 @@ Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoColl return accumulated_chunk; } -void Squashing::expandCurrentSize(size_t rows, size_t bytes) -{ - accumulated_size.rows += rows; - accumulated_size.bytes += bytes; -} - -void Squashing::changeCurrentSize(size_t rows, size_t bytes) -{ - accumulated_size.rows = rows; - accumulated_size.bytes = bytes; -} - bool Squashing::isEnoughSize(size_t rows, size_t bytes) const { return (!min_block_size_rows && !min_block_size_bytes) || (min_block_size_rows && rows >= min_block_size_rows) || (min_block_size_bytes && bytes >= min_block_size_bytes); } + +bool Squashing::isEnoughSize() const +{ + return isEnoughSize(accumulated.getRows(), accumulated.getBytes()); +}; + +bool Squashing::isEnoughSize(const Chunk & chunk) const +{ + return isEnoughSize(chunk.getNumRows(), chunk.bytes()); +} + +void Squashing::CurrentSize::add(Chunk && chunk) +{ + rows += chunk.getNumRows(); + bytes += chunk.bytes(); + chunks.push_back(std::move(chunk)); +} + +std::vector Squashing::CurrentSize::extract() +{ + auto result = std::move(chunks); + *this = {}; + return result; +} } diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 08535119241..830d621b43b 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -38,36 +38,39 @@ public: class Squashing { public: - explicit Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_); + explicit Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); Squashing(Squashing && other) = default; Chunk add(Chunk && input_chunk); static Chunk squash(Chunk && input_chunk); Chunk flush(); - bool isDataLeft() - { - return !chunks_to_merge_vec.empty(); - } - private: - struct CurrentSize + class CurrentSize { + std::vector chunks = {}; size_t rows = 0; size_t bytes = 0; + + public: + explicit operator bool () const { return !chunks.empty(); } + size_t getRows() const { return rows; } + size_t getBytes() const { return bytes; } + void add(Chunk && chunk); + std::vector extract(); }; - std::vector chunks_to_merge_vec = {}; - size_t min_block_size_rows; - size_t min_block_size_bytes; + const size_t min_block_size_rows; + const size_t min_block_size_bytes; + const Block header; - CurrentSize accumulated_size; + CurrentSize accumulated; static Chunk squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos); - void expandCurrentSize(size_t rows, size_t bytes); - void changeCurrentSize(size_t rows, size_t bytes); + bool isEnoughSize() const; bool isEnoughSize(size_t rows, size_t bytes) const; + bool isEnoughSize(const Chunk & chunk) const; Chunk convertToChunk(std::vector && chunks) const; }; diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 51bc69f6b9b..94b890198d4 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -11,7 +11,7 @@ class ApplySquashingTransform : public ExceptionKeepingTransform public: explicit ApplySquashingTransform(const Block & header, const size_t min_block_size_rows, const size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } @@ -27,18 +27,12 @@ public: } ExceptionKeepingTransform::work(); - if (finish_chunk) - { - data.chunk = std::move(finish_chunk); - ready_output = true; - } } protected: void onConsume(Chunk chunk) override { - if (auto res_chunk = DB::Squashing::squash(std::move(chunk))) - cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows()); + cur_chunk = DB::Squashing::squash(std::move(chunk)); } GenerateResult onGenerate() override @@ -48,16 +42,10 @@ protected: res.is_done = true; return res; } - void onFinish() override - { - auto chunk = DB::Squashing::squash({}); - finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); - } private: Squashing squashing; Chunk cur_chunk; - Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index d1d3fcd3205..6a8cd10027e 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -1,4 +1,6 @@ #include +#include "Common/Logger.h" +#include "Common/logger_useful.h" #include namespace DB @@ -10,22 +12,22 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : IInflatingTransform(header, header) - , squashing(min_block_size_rows, min_block_size_bytes) + Block header_, size_t min_block_size_rows, size_t min_block_size_bytes) + : IInflatingTransform(header_, header_) + , squashing(header_, min_block_size_rows, min_block_size_bytes) { } void PlanSquashingTransform::consume(Chunk chunk) { - Chunk result = squashing.add(std::move(chunk)); - if (!result.getChunkInfos().empty()) - squashed_chunk = std::move(result); + LOG_DEBUG(getLogger("PlanSquashingTransform"), "consume {}", chunk.getNumRows()); + + squashed_chunk = squashing.add(std::move(chunk)); } Chunk PlanSquashingTransform::generate() { - if (squashed_chunk.getChunkInfos().empty()) + if (!squashed_chunk) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); Chunk result_chunk; @@ -35,12 +37,11 @@ Chunk PlanSquashingTransform::generate() bool PlanSquashingTransform::canGenerate() { - return !squashed_chunk.getChunkInfos().empty(); + return bool(squashed_chunk); } Chunk PlanSquashingTransform::getRemaining() { - Chunk current_chunk = squashing.flush(); - return current_chunk; + return squashing.flush(); } } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index 4ad2ec2d089..1f83e62284d 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -10,7 +10,7 @@ class PlanSquashingTransform : public IInflatingTransform { public: PlanSquashingTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + Block header_, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "PlanSquashingTransform"; } diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 1e3798e89c8..e457a262681 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -12,15 +12,13 @@ extern const int LOGICAL_ERROR; SquashingTransform::SquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } void SquashingTransform::onConsume(Chunk chunk) { - Chunk planned_chunk = squashing.add(std::move(chunk)); - if (!planned_chunk.getChunkInfos().empty()) - cur_chunk = DB::Squashing::squash(std::move(planned_chunk)); + cur_chunk = DB::Squashing::squash(squashing.add(std::move(chunk))); } SquashingTransform::GenerateResult SquashingTransform::onGenerate() @@ -33,10 +31,7 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate() void SquashingTransform::onFinish() { - Chunk chunk = squashing.flush(); - if (!chunk.getChunkInfos().empty()) - chunk = DB::Squashing::squash(std::move(chunk)); - finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); + finish_chunk = DB::Squashing::squash(squashing.flush()); } void SquashingTransform::work() @@ -49,6 +44,7 @@ void SquashingTransform::work() } ExceptionKeepingTransform::work(); + if (finish_chunk) { data.chunk = std::move(finish_chunk); @@ -59,7 +55,7 @@ void SquashingTransform::work() SimpleSquashingTransform::SimpleSquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ISimpleTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } @@ -67,18 +63,14 @@ void SimpleSquashingTransform::transform(Chunk & chunk) { if (!finished) { - Chunk planned_chunk = squashing.add(std::move(chunk)); - if (!planned_chunk.getChunkInfos().empty()) - chunk = DB::Squashing::squash(std::move(planned_chunk)); + chunk = DB::Squashing::squash(squashing.add(std::move(chunk))); } else { if (chunk.hasRows()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - chunk = squashing.flush(); - if (!chunk.getChunkInfos().empty()) - chunk = DB::Squashing::squash(std::move(chunk)); + chunk = DB::Squashing::squash(squashing.flush()); } } diff --git a/src/Processors/Transforms/SquashingTransform.h b/src/Processors/Transforms/SquashingTransform.h index 9d1591d0bcd..c5b727ac6ec 100644 --- a/src/Processors/Transforms/SquashingTransform.h +++ b/src/Processors/Transforms/SquashingTransform.h @@ -26,7 +26,6 @@ protected: private: Squashing squashing; Chunk cur_chunk; - Chunk::ChunkInfoCollection cur_chunkinfos; Chunk finish_chunk; }; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index bc1487acefa..22d2c4eeebc 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -884,7 +884,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro using PushResult = AsynchronousInsertQueue::PushResult; startInsertQuery(); - Squashing squashing(0, query_context->getSettingsRef().async_insert_max_data_size); + Squashing squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); Block header = state.input_header; diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index a8334f22272..0beeca0d542 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1287,7 +1287,7 @@ void PartMergerWriter::prepare() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { // We split the materialization into multiple stages similar to the process of INSERT SELECT query. - projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); + projection_squashes.emplace_back(ctx->updated_header, settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } existing_rows_count = 0; From bc31f851273e719658ef70371f6b684f4e1c0e69 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 27 Jun 2024 00:29:11 +0200 Subject: [PATCH 105/273] fix style --- src/Interpreters/Squashing.cpp | 2 +- src/Interpreters/TreeRewriter.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index dbf16452287..971f0102148 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -88,7 +88,7 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const auto info = std::make_shared(); info->chunks = std::move(chunks); - // It is imortant that chunk is not empty, it has to have colums even if they are emty + // It is imortant that chunk is not empty, it has to have columns even if they are empty auto aggr_chunk = Chunk(header.getColumns(), 0); aggr_chunk.getChunkInfos().add(std::move(info)); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index a3c5a7ed3ed..6ce6f5e454e 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1188,7 +1188,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select } } - /// Check for dynamic subcolums in unknown required columns. + /// Check for dynamic subcolumns in unknown required columns. if (!unknown_required_source_columns.empty()) { for (const NameAndTypePair & pair : source_columns_ordinary) From d485606e9420eec3e617e5ec49a1c1ac16478a85 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 27 Jun 2024 02:09:45 +0200 Subject: [PATCH 106/273] fix header in async insert and projections --- src/Interpreters/Squashing.cpp | 22 ++++++++++++---------- src/Interpreters/Squashing.h | 5 ++++- src/Server/TCPHandler.cpp | 22 ++++++++-------------- src/Storages/MergeTree/MutateTask.cpp | 18 +++++++----------- 4 files changed, 31 insertions(+), 36 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 971f0102148..2b808e25fbb 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,6 +1,7 @@ #include #include #include +#include "base/defines.h" namespace DB @@ -22,7 +23,9 @@ Chunk Squashing::flush() if (!accumulated) return {}; - return convertToChunk(accumulated.extract()); + auto result = convertToChunk(accumulated.extract()); + chassert(result); + return result; } Chunk Squashing::squash(Chunk && input_chunk) @@ -73,9 +76,7 @@ Chunk Squashing::add(Chunk && input_chunk) /// If accumulated data is big enough, we send it if (isEnoughSize()) - { return convertToChunk(accumulated.extract()); - } return {}; } @@ -91,7 +92,7 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const // It is imortant that chunk is not empty, it has to have columns even if they are empty auto aggr_chunk = Chunk(header.getColumns(), 0); aggr_chunk.getChunkInfos().add(std::move(info)); - + chassert(aggr_chunk); return aggr_chunk; } @@ -118,16 +119,17 @@ Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoColl for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) { const auto source_column = columns[j]; - mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); } } - Chunk accumulated_chunk; - accumulated_chunk.setColumns(std::move(mutable_columns), rows); - accumulated_chunk.setChunkInfos(infos); - accumulated_chunk.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos())); - return accumulated_chunk; + Chunk result; + result.setColumns(std::move(mutable_columns), rows); + result.setChunkInfos(infos); + result.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos())); + + chassert(result); + return result; } bool Squashing::isEnoughSize(size_t rows, size_t bytes) const diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 830d621b43b..64a9768a71f 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -45,6 +45,9 @@ public: static Chunk squash(Chunk && input_chunk); Chunk flush(); + void setHeader(Block header_) { header = std::move(header_); } + const Block & getHeader() const { return header; } + private: class CurrentSize { @@ -62,7 +65,7 @@ private: const size_t min_block_size_rows; const size_t min_block_size_bytes; - const Block header; + Block header; CurrentSize accumulated; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 22d2c4eeebc..fd226db5bb1 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -886,16 +886,13 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro startInsertQuery(); Squashing squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); - Block header = state.input_header; - while (readDataNext()) { - header = state.block_for_insert.cloneEmpty(); - auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); - if (!planned_chunk.getChunkInfos().empty()) + squashing.setHeader(state.block_for_insert.cloneEmpty()); + auto result_chunk = DB::Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()})); + if (result_chunk) { - Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = header.cloneWithColumns(result_chunk.detachColumns()); + auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -904,16 +901,13 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - auto planned_chunk = squashing.flush(); - if (planned_chunk.getChunkInfos().empty()) + Chunk result_chunk = DB::Squashing::squash(squashing.flush()); + if (!result_chunk) { - return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(header), query_context); + return insert_queue.pushQueryWithBlock(state.parsed_query, squashing.getHeader(), query_context); } - Chunk result_chunk; - result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - - auto result = header.cloneWithColumns(result_chunk.detachColumns()); + auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 0beeca0d542..5da36b6ee3b 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1315,14 +1315,12 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); - projection_header = block_to_squash.cloneEmpty(); + projection_squashes[i].setHeader(block_to_squash.cloneEmpty()); - if (!planned_chunk.getChunkInfos().empty()) + Chunk squashed_chunk = DB::Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); + if (squashed_chunk) { - Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); - - auto result = projection_header.cloneWithColumns(projection_chunk.detachColumns()); + auto result = projection_squashes[i].getHeader().cloneWithColumns(squashed_chunk.detachColumns()); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); @@ -1343,12 +1341,10 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash_plan = projection_squashes[i]; - auto planned_chunk = projection_squash_plan.flush(); - if (!planned_chunk.getChunkInfos().empty()) + auto squashed_chunk = DB::Squashing::squash(projection_squash_plan.flush()); + if (squashed_chunk) { - Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); - - auto result = projection_header.cloneWithColumns(projection_chunk.detachColumns()); + auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns()); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); From c8bca3135de71d4adafe74c415adfc14683ad7f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rodolphe=20Dug=C3=A9=20de=20Bernonville?= Date: Wed, 26 Jun 2024 14:51:21 +0200 Subject: [PATCH 107/273] fix odbc and nullable fields --- programs/odbc-bridge/ODBCSource.cpp | 13 ++++- .../integration/test_odbc_interaction/test.py | 55 +++++++++++++++++++ 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/programs/odbc-bridge/ODBCSource.cpp b/programs/odbc-bridge/ODBCSource.cpp index 940970f36ab..41a9813ce50 100644 --- a/programs/odbc-bridge/ODBCSource.cpp +++ b/programs/odbc-bridge/ODBCSource.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -47,9 +48,17 @@ Chunk ODBCSource::generate() for (int idx = 0; idx < result.columns(); ++idx) { const auto & sample = description.sample_block.getByPosition(idx); - if (!result.is_null(idx)) - insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx); + { + if (columns[idx]->isNullable()) + { + ColumnNullable & column_nullable = assert_cast(*columns[idx]); + insertValue(column_nullable.getNestedColumn(), removeNullable(sample.type), description.types[idx].first, result, idx); + column_nullable.getNullMapData().emplace_back(0); + } + else + insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx); + } else insertDefaultValue(*columns[idx], *sample.column); } diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index 06cbe70f7c6..0d0d7a0afb1 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -40,6 +40,16 @@ create_table_sql_template = """ PRIMARY KEY (`id`)) ENGINE=InnoDB; """ +create_table_sql_nullable_template = """ + CREATE TABLE `clickhouse`.`{}` ( + `id` integer not null, + `col1` integer, + `col2` decimal(15,10), + `col3` varchar(32), + `col4` datetime + ) + """ + def skip_test_msan(instance): if instance.is_built_with_memory_sanitizer(): @@ -77,6 +87,11 @@ def create_mysql_db(conn, name): cursor.execute("CREATE DATABASE {} DEFAULT CHARACTER SET 'utf8'".format(name)) +def create_mysql_nullable_table(conn, table_name): + with conn.cursor() as cursor: + cursor.execute(create_table_sql_nullable_template.format(table_name)) + + def create_mysql_table(conn, table_name): with conn.cursor() as cursor: cursor.execute(create_table_sql_template.format(table_name)) @@ -192,6 +207,46 @@ def started_cluster(): cluster.shutdown() +def test_mysql_odbc_select_nullable(started_cluster): + skip_test_msan(node1) + mysql_setup = node1.odbc_drivers["MySQL"] + + table_name = "test_insert_nullable_select" + conn = get_mysql_conn() + create_mysql_nullable_table(conn, table_name) + with conn.cursor() as cursor: + cursor.execute( + "INSERT INTO clickhouse.{} VALUES(1, 1, 1.23456, 'data1', '2010-01-01 00:00:00');".format( + table_name + ) + ) + cursor.execute( + "INSERT INTO clickhouse.{} VALUES(2, NULL, NULL, NULL, NULL);".format( + table_name + ) + ) + conn.commit() + + node1.query( + """ + CREATE TABLE {}(id UInt32, col1 Nullable(UInt32), col2 Nullable(Decimal(15, 10)), col3 Nullable(String), col4 Nullable(DateTime)) ENGINE = ODBC('DSN={}', 'clickhouse', '{}'); + """.format( + table_name, mysql_setup["DSN"], table_name + ) + ) + + assert ( + node1.query( + "SELECT id, col1, col2, col3, col4 from {} order by id asc".format( + table_name + ) + ) + == "1\t1\t1.23456\tdata1\t2010-01-01 00:00:00\n2\t\\N\t\\N\t\\N\t\\N\n" + ) + drop_mysql_table(conn, table_name) + conn.close() + + def test_mysql_simple_select_works(started_cluster): skip_test_msan(node1) From 0bf26dbeac3e0cb6b521bf8ec9181127594c2161 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 16 May 2024 14:54:50 +0000 Subject: [PATCH 108/273] Forbid POPULATE with Replicated databases --- src/Interpreters/InterpreterCreateQuery.cpp | 6 +++--- ...33_replicated_database_forbid_create_as_select.reference | 1 + .../02933_replicated_database_forbid_create_as_select.sh | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 7272e10b801..7188bd166f4 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1305,7 +1305,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (need_add_to_database) database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (database && database->getEngineName() == "Replicated" && create.select) + if (database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) { bool is_storage_replicated = false; if (create.storage && create.storage->engine) @@ -1315,11 +1315,11 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) is_storage_replicated = true; } - const bool allow_create_select_for_replicated = create.isView() || create.is_create_empty || !is_storage_replicated; + const bool allow_create_select_for_replicated = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated; if (!allow_create_select_for_replicated) throw Exception( ErrorCodes::SUPPORT_IS_DISABLED, - "CREATE AS SELECT is not supported with Replicated databases. Use separate CREATE and INSERT queries"); + "CREATE AS SELECT and POPULATE is not supported with Replicated databases. Use separate CREATE and INSERT queries"); } if (database && database->shouldReplicateQuery(getContext(), query_ptr)) diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference index d00491fd7e5..6ed281c757a 100644 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference @@ -1 +1,2 @@ 1 +1 diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index 8a6904b6bd7..df060ee2612 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -11,6 +11,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" # Non-replicated engines are allowed ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test (id UInt64) ENGINE = MergeTree() ORDER BY id AS SELECT 1" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv (id UInt64) ENGINE = MergeTree() ORDER BY id POPULATE AS SELECT 1" # Replicated storafes are forbidden ${CLICKHOUSE_CLIENT} --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" From 6dc90798c2bce90fd5a2fbf73d69575e2f4bd693 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 27 Jun 2024 12:43:59 +0000 Subject: [PATCH 109/273] add setting database_replicated_allow_heavy_create --- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 1 + src/Interpreters/InterpreterCreateQuery.cpp | 6 ++++-- ...933_replicated_database_forbid_create_as_select.sh | 11 +++++++++-- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 41878142bdc..13751b3d1a2 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -732,6 +732,7 @@ class IColumn; M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \ M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \ M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \ + M(Bool, database_replicated_allow_heavy_create, false, "Allow long-running DDL queries (CREATE AS SELECT and POPULATE) in Replicated database engine. Note that it can block DDL queue for a long time.", 0) \ M(Bool, cloud_mode, false, "Only available in ClickHouse Cloud", 0) \ M(UInt64, cloud_mode_engine, 1, "Only available in ClickHouse Cloud", 0) \ M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw', 'none_only_active', 'throw_only_active', 'null_status_on_timeout_only_active'", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index fba6386b9bd..ee013907353 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -87,6 +87,7 @@ namespace SettingsChangesHistory static const std::map settings_changes_history = { {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + {"database_replicated_allow_heavy_create", true, false, "Allow long-running DDL queries (CREATE AS SELECT and POPULATE) in Replicated database engine."}, }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 7188bd166f4..4e4598a2574 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1305,7 +1305,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (need_add_to_database) database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) + bool allow_heavy_create = getContext()->getSettingsRef().database_replicated_allow_heavy_create; + if (!allow_heavy_create && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) { bool is_storage_replicated = false; if (create.storage && create.storage->engine) @@ -1319,7 +1320,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (!allow_create_select_for_replicated) throw Exception( ErrorCodes::SUPPORT_IS_DISABLED, - "CREATE AS SELECT and POPULATE is not supported with Replicated databases. Use separate CREATE and INSERT queries"); + "CREATE AS SELECT and POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT queries. " + "Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with caution"); } if (database && database->shouldReplicateQuery(getContext(), query_ptr)) diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index df060ee2612..831963cca8d 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -9,10 +9,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" + # Non-replicated engines are allowed ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test (id UInt64) ENGINE = MergeTree() ORDER BY id AS SELECT 1" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv (id UInt64) ENGINE = MergeTree() ORDER BY id POPULATE AS SELECT 1" + # Replicated storafes are forbidden -${CLICKHOUSE_CLIENT} --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" -${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" + +# But it is allowed with the special setting +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1 + ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" From 5ddb9b11f487b3f13cb6d7e1d69e22779b6a745f Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 27 Jun 2024 14:33:37 +0000 Subject: [PATCH 110/273] remove unwanted changes --- src/Backups/BackupIO_AzureBlobStorage.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 0ee0160a969..cee41861d70 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -36,7 +36,7 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( const WriteSettings & write_settings_, const ContextPtr & context_) : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getContainer(), false, false} + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getConnectionURL(), false, false} , connection_params(connection_params_) , blob_path(blob_path_) { @@ -128,7 +128,7 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( const ContextPtr & context_, bool attempt_to_create_container) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getContainer(), false, false} + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getConnectionURL(), false, false} , connection_params(connection_params_) , blob_path(blob_path_) { From 8d0834eadeeea4b2cd36ba8ff50bdac2d7cd3b35 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 27 Jun 2024 14:52:25 +0000 Subject: [PATCH 111/273] fix --- .../02933_replicated_database_forbid_create_as_select.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index 831963cca8d..15f169d880f 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -15,11 +15,11 @@ ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${ ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv (id UInt64) ENGINE = MergeTree() ORDER BY id POPULATE AS SELECT 1" # Replicated storafes are forbidden -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" # But it is allowed with the special setting -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test2', '1') ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_mv2', '1') ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1 +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1 ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" From 0220a3cac74ad0e96244c68a00a674a41dfb47c4 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 27 Jun 2024 17:38:15 +0200 Subject: [PATCH 112/273] fix tests --- src/Processors/Transforms/DeduplicationTokenTransforms.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index 10c21249ebc..bcb8ee94f7a 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -67,6 +67,9 @@ void TokenInfo::addChunkHash(String part) void TokenInfo::defineSourceWithChunkHashes() { + if (stage == UNDEFINED && empty()) + stage = DEFINE_SOURCE_WITH_HASHES; + if (stage != DEFINE_SOURCE_WITH_HASHES) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); From 9fa5764c9e330a0c7b21427b5e1972b55951d850 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Thu, 27 Jun 2024 21:57:14 +0200 Subject: [PATCH 113/273] Update src/Processors/Transforms/DeduplicationTokenTransforms.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Processors/Transforms/DeduplicationTokenTransforms.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 416d4bb5f62..c3944b8dd1d 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -72,7 +72,7 @@ namespace DeduplicationToken * UNDEFINED -> DEFINE_SOURCE_USER_TOKEN // setUserToken * DEFINE_SOURCE_USER_TOKEN -> DEFINED // defineSourceWithUserToken * - * After token is define it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views. + * After token is defined, it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views. * * transition // method * DEFINED -> DEFINE_VIEW // setViewID From 1c12c95b79d24e4fba9362d140910ac6a4d16f35 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Thu, 27 Jun 2024 21:57:24 +0200 Subject: [PATCH 114/273] Update src/Processors/Transforms/DeduplicationTokenTransforms.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Processors/Transforms/DeduplicationTokenTransforms.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index c3944b8dd1d..9d087536a38 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -59,7 +59,7 @@ namespace DeduplicationToken size_t getTotalSize() const; /* Token has to be prepared in a particular order. - * BuildingStage ensures that token is expanded according the foloving order. + * BuildingStage ensures that token is expanded according the following order. * Firstly token is expanded with information about the source. * It could be done with two ways: add several hash sums from the source chunks or provide user defined deduplication token and its sequentional block number. * From cb3d0ed2757fc6de98fdb0bad1e74f83facd7c88 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Fri, 28 Jun 2024 02:20:35 +0200 Subject: [PATCH 115/273] Update StorageMaterializedView.cpp --- src/Storages/StorageMaterializedView.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 316f398b476..f9f627863dd 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -161,6 +161,7 @@ StorageMaterializedView::StorageMaterializedView( manual_create_query->setDatabase(getStorageID().database_name); manual_create_query->setTable(generateInnerTableName(getStorageID())); manual_create_query->uuid = query.to_inner_uuid; + manual_create_query->has_uuid = true; auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); From b16451ad8946fdeb93ca259af083467853b6ac22 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Fri, 28 Jun 2024 02:28:07 +0200 Subject: [PATCH 116/273] Update StorageMaterializedView.cpp --- src/Storages/StorageMaterializedView.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index f9f627863dd..ec1559b71a4 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -161,7 +161,7 @@ StorageMaterializedView::StorageMaterializedView( manual_create_query->setDatabase(getStorageID().database_name); manual_create_query->setTable(generateInnerTableName(getStorageID())); manual_create_query->uuid = query.to_inner_uuid; - manual_create_query->has_uuid = true; + manual_create_query->has_uuid = query.to_inner_uuid != UUIDHelpers::Nil; auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); From fa7bad4993dee91009cc275fb49755aee0bf849d Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 28 Jun 2024 13:51:42 +0200 Subject: [PATCH 117/273] Decrease sampling rate slightly --- src/Common/GWPAsan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/GWPAsan.cpp b/src/Common/GWPAsan.cpp index ea376609ff4..f4a916a696b 100644 --- a/src/Common/GWPAsan.cpp +++ b/src/Common/GWPAsan.cpp @@ -57,7 +57,7 @@ static bool guarded_alloc_initialized = [] opts.MaxSimultaneousAllocations = 1024; if (!env_options_raw || !std::string_view{env_options_raw}.contains("SampleRate")) - opts.SampleRate = 5000; + opts.SampleRate = 6000; const char * collect_stacktraces = std::getenv("GWP_ASAN_COLLECT_STACKTRACES"); // NOLINT(concurrency-mt-unsafe) if (collect_stacktraces && std::string_view{collect_stacktraces} == "1") From 31c65a40926d3d5209898f5efb5c1cf33b602133 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 28 Jun 2024 14:21:14 +0000 Subject: [PATCH 118/273] add settings to change history --- src/Core/SettingsChangesHistory.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 4ac25a649b7..1c5ad9d0875 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -87,6 +87,9 @@ namespace SettingsChangesHistory static const std::map settings_changes_history = { {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"}, + {"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"}, + {"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"}, }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, From 898dd8bb8efd260733c8b967868e4fcb88fb145e Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Fri, 28 Jun 2024 15:15:19 +0000 Subject: [PATCH 119/273] Throw exception in bitShift for negative shift positions --- src/Functions/bitShiftLeft.cpp | 6 ++++++ src/Functions/bitShiftRight.cpp | 7 +++++++ ...ift_throws_error_for_negative_shift_positions.reference | 0 ...bit_shift_throws_error_for_negative_shift_positions.sql | 7 +++++++ 4 files changed, 20 insertions(+) create mode 100644 tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.reference create mode 100644 tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.sql diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index c366a1ecb44..c3f5de628aa 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -24,6 +24,8 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); + else if (b < 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else if constexpr (is_big_int_v) return static_cast(a) << static_cast(b); else @@ -35,6 +37,8 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); + else if (b < 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else { UInt8 word_size = 8; @@ -100,6 +104,8 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); + else if (b < 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else { UInt8 word_size = 8; diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 1c37cd3bf4c..b53485c45f5 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -8,6 +8,7 @@ namespace ErrorCodes { extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; } namespace @@ -25,6 +26,8 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); + else if (b < 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else if constexpr (is_big_int_v) return static_cast(a) >> static_cast(b); else @@ -51,6 +54,8 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); + else if (b < 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else { UInt8 word_size = 8; @@ -88,6 +93,8 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); + else if (b < 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else { UInt8 word_size = 8; diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.reference b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.sql b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.sql new file mode 100644 index 00000000000..659d03d1951 --- /dev/null +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.sql @@ -0,0 +1,7 @@ +SELECT bitShiftRight(1, -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT bitShiftRight('hola', -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT bitShiftRight(toFixedString('hola', 10), -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT bitShiftLeft(1, -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT bitShiftLeft('hola', -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT bitShiftLeft(toFixedString('hola', 10), -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } \ No newline at end of file From 77c8f034597639439e7b9c09ea89207c73cd398e Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Fri, 28 Jun 2024 17:26:03 +0000 Subject: [PATCH 120/273] Fix coding style --- utils/check-style/check-style | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 380656cd1ca..31972894c3d 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -94,6 +94,7 @@ EXTERN_TYPES_EXCLUDES=( ErrorCodes::values[i] ErrorCodes::getErrorCodeByName ErrorCodes::Value + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT ) for extern_type in ${!EXTERN_TYPES[@]}; do type_of_extern=${EXTERN_TYPES[$extern_type]} From d4b71ea4cbacb614b35ac6cd3fd07a0c299e3415 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 28 Jun 2024 23:09:08 +0000 Subject: [PATCH 121/273] fix settings changes --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index c8ddf23ba08..a4883e3f209 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -87,6 +87,7 @@ namespace SettingsChangesHistory static const std::map settings_changes_history = { {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + {"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"}, }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, @@ -153,7 +154,6 @@ static const std::map Date: Sat, 29 Jun 2024 01:35:59 +0200 Subject: [PATCH 122/273] adjust logging --- src/Interpreters/Squashing.cpp | 2 +- src/Processors/Transforms/DeduplicationTokenTransforms.cpp | 2 +- src/Processors/Transforms/PlanSquashingTransform.cpp | 4 ---- src/Processors/Transforms/PlanSquashingTransform.h | 1 - 4 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 2b808e25fbb..25434d1103e 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,7 +1,7 @@ #include #include #include -#include "base/defines.h" +#include namespace DB diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index bcb8ee94f7a..374a6495f79 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -155,7 +155,7 @@ void CheckTokenTransform::transform(Chunk & chunk) return; } - LOG_DEBUG(log, "{}, token: {}", debug, token_info->debugToken()); + LOG_DEBUG(log, "debug: {}, token: {}", debug, token_info->debugToken()); } #endif diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 6a8cd10027e..ee4dfa6a64e 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -1,6 +1,4 @@ #include -#include "Common/Logger.h" -#include "Common/logger_useful.h" #include namespace DB @@ -20,8 +18,6 @@ PlanSquashingTransform::PlanSquashingTransform( void PlanSquashingTransform::consume(Chunk chunk) { - LOG_DEBUG(getLogger("PlanSquashingTransform"), "consume {}", chunk.getNumRows()); - squashed_chunk = squashing.add(std::move(chunk)); } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index 1f83e62284d..e6db245499e 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -23,7 +23,6 @@ protected: private: Squashing squashing; Chunk squashed_chunk; - Chunk finish_chunk; }; } From 004d913c565cc0646222601d6a98789b77d92938 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Sun, 30 Jun 2024 02:14:28 +0000 Subject: [PATCH 123/273] change option to enum and add test --- src/Core/Settings.h | 2 +- src/Core/SettingsEnums.cpp | 4 ++++ src/Core/SettingsEnums.h | 8 ++++++++ src/Interpreters/InterpreterDeleteQuery.cpp | 6 +++--- .../0_stateless/03161_lightweight_delete_projection.sql | 7 +++++++ 5 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 513cdf9f9a2..574017a6953 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -612,7 +612,7 @@ class IColumn; M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \ M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \ - M(String, lightweight_mutation_projection_mode, "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete.", 0) \ + M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete.", 0) \ M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 05985316566..9dfff3c56ca 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -173,6 +173,10 @@ IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGU {{"default", ParallelReplicasCustomKeyFilterType::DEFAULT}, {"range", ParallelReplicasCustomKeyFilterType::RANGE}}) +IMPLEMENT_SETTING_ENUM(LightweightMutationProjectionMode, ErrorCodes::BAD_ARGUMENTS, + {{"throw", LightweightMutationProjectionMode::THROW}, + {"drop", LightweightMutationProjectionMode::DROP}}) + IMPLEMENT_SETTING_AUTO_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS) IMPLEMENT_SETTING_ENUM(ParquetVersion, ErrorCodes::BAD_ARGUMENTS, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 575cd8700c8..8456c4b688c 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -339,6 +339,14 @@ enum class ParallelReplicasCustomKeyFilterType : uint8_t DECLARE_SETTING_ENUM(ParallelReplicasCustomKeyFilterType) +enum class LightweightMutationProjectionMode : uint8_t +{ + THROW, + DROP, +}; + +DECLARE_SETTING_ENUM(LightweightMutationProjectionMode) + DECLARE_SETTING_ENUM(LocalFSReadMethod) enum class S3QueueMode : uint8_t diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 23bbd18ff51..39d5d9e9cef 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -116,15 +116,15 @@ BlockIO InterpreterDeleteQuery::execute() if (table->hasProjection()) { auto context = Context::createCopy(getContext()); - auto mode = Field(context->getSettingsRef().lightweight_mutation_projection_mode); - if (mode == "throw") + auto mode = context->getSettingsRef().lightweight_mutation_projection_mode; + if (mode == LightweightMutationProjectionMode::THROW) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DELETE query is not supported for table {} as it has projections. " "User should drop all the projections manually before running the query", table->getStorageID().getFullTableName()); } - else if (mode == "drop") + else if (mode == LightweightMutationProjectionMode::DROP) { std::vector all_projections = metadata_snapshot->projections.getAllRegisteredNames(); diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index 786f6a3cc34..70a069df1bc 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -19,6 +19,13 @@ DELETE FROM users WHERE uid = 8888 SETTINGS lightweight_mutation_projection_mode DELETE FROM users WHERE uid = 6666 SETTINGS lightweight_mutation_projection_mode = 'drop'; +-- expecting no projection +SELECT + name, + `table` +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users'); + SELECT * FROM users; DROP TABLE users; From 15d9ad65c65a476e8573ec37aca25050a8a8f7a4 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 1 Jul 2024 09:01:32 +0200 Subject: [PATCH 124/273] Reduce even more --- src/Common/GWPAsan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/GWPAsan.cpp b/src/Common/GWPAsan.cpp index f4a916a696b..0482ddb4e2b 100644 --- a/src/Common/GWPAsan.cpp +++ b/src/Common/GWPAsan.cpp @@ -57,7 +57,7 @@ static bool guarded_alloc_initialized = [] opts.MaxSimultaneousAllocations = 1024; if (!env_options_raw || !std::string_view{env_options_raw}.contains("SampleRate")) - opts.SampleRate = 6000; + opts.SampleRate = 8000; const char * collect_stacktraces = std::getenv("GWP_ASAN_COLLECT_STACKTRACES"); // NOLINT(concurrency-mt-unsafe) if (collect_stacktraces && std::string_view{collect_stacktraces} == "1") From 98293b16249b21b4a69da49524c1dffce3fc5fb2 Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Mon, 1 Jul 2024 07:31:57 +0000 Subject: [PATCH 125/273] Max sessions for user tests improvements --- .../test.py | 6 ++-- .../02832_alter_max_sessions_for_user.sh | 36 ++++++++++++------- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/tests/integration/test_profile_max_sessions_for_user/test.py b/tests/integration/test_profile_max_sessions_for_user/test.py index 133991fed7a..a2fa77e8dc9 100755 --- a/tests/integration/test_profile_max_sessions_for_user/test.py +++ b/tests/integration/test_profile_max_sessions_for_user/test.py @@ -7,7 +7,7 @@ import pytest import sys import threading -from helpers.cluster import ClickHouseCluster, run_and_check +from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_logs_contain_with_retry from helpers.uclient import client, prompt @@ -51,7 +51,7 @@ instance = cluster.add_instance( def get_query(name, id): - return f"SElECT '{name}', {id}, number from system.numbers" + return f"SELECT '{name}', {id}, COUNT(*) from system.numbers" def grpc_get_url(): @@ -90,7 +90,7 @@ def threaded_run_test(sessions): if len(sessions) > MAX_SESSIONS_FOR_USER: # High retry amount to avoid flakiness in ASAN (+Analyzer) tests assert_logs_contain_with_retry( - instance, "overflown session count", retry_count=60 + instance, "overflown session count", retry_count=120 ) instance.query(f"KILL QUERY WHERE user='{TEST_USER}' SYNC") diff --git a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh index a3b0d17f1be..87fbffdb1e6 100755 --- a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh +++ b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh @@ -1,10 +1,12 @@ #!/usr/bin/env bash +# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh SESSION_ID_PREFIX="02832_alter_max_sessions_session_$$" +QUERY_ID_PREFIX="02832_alter_max_sessions_query_$$" PROFILE="02832_alter_max_sessions_profile_$$" USER="02832_alter_max_sessions_user_$$" USER2="02832_alter_max_sessions_user_two_$$" @@ -15,6 +17,26 @@ ${CLICKHOUSE_CLIENT} -q $"DROP PROFILE IF EXISTS ${PROFILE}" ${CLICKHOUSE_CLIENT} -q $"CREATE SETTINGS PROFILE ${PROFILE}" ${CLICKHOUSE_CLIENT} -q $"CREATE USER '${USER}' SETTINGS PROFILE '${PROFILE}'" +function run_sessions_set() +{ + local sessions_count="$1" + local session_check="$2" + for ((i = 1 ; i <= ${sessions_count} ; i++)); do + local session_id="${SESSION_ID_PREFIX}_${i}" + local query_id="${QUERY_ID_PREFIX}_${i}" + # Write only expected error text + # More than alter_sessions_count queries will not start. + ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&query_id=${query_id}&session_id=${session_id}&session_check=${session_check}&session_timeout=600&function_sleep_max_microseconds_per_block=120000000" --data-binary "SELECT sleep(120)" | grep -o -m 1 'USER_SESSION_LIMIT_EXCEEDED' & + done + + for ((i = 1 ; i <= ${sessions_count} ; i++)); do + local query_id="${QUERY_ID_PREFIX}_${i}" + $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE query_id='$query_id' SYNC" >/dev/null + done + + wait +} + function test_alter_profile() { local max_session_count="$1" @@ -24,23 +46,13 @@ function test_alter_profile() ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${max_session_count}" # Create sessions with $max_session_count restriction - for ((i = 1 ; i <= ${max_session_count} ; i++)); do - local session_id="${SESSION_ID_PREFIX}_${i}" - # Skip output from this query - ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=0" --data-binary "SELECT 1" > /dev/null - done + run_sessions_set $max_session_count 0 # Update restriction to $alter_sessions_count ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${alter_sessions_count}" # Simultaneous sessions should use max settings from profile ($alter_sessions_count) - for ((i = 1 ; i <= ${max_session_count} ; i++)); do - local session_id="${SESSION_ID_PREFIX}_${i}" - # ignore select 1, we need only errors - ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=1" --data-binary "select sleep(0.3)" | grep -o -m 1 'USER_SESSION_LIMIT_EXCEEDED' & - done - - wait + run_sessions_set $max_session_count 1 } test_alter_profile 1 1 From 6b47171f2c2a3f3ebaed692f6d30e644c42380db Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 1 Jul 2024 10:52:08 +0200 Subject: [PATCH 126/273] Keeper binary with different entrypoint --- docker/packager/packager | 5 +- programs/CMakeLists.txt | 20 +- programs/keeper/CMakeLists.txt | 196 +------------- programs/keeper/Keeper.cpp | 10 - programs/keeper/keeper_main.cpp | 443 ++++++++++++++++++++++++++++++++ 5 files changed, 460 insertions(+), 214 deletions(-) create mode 100644 programs/keeper/keeper_main.cpp diff --git a/docker/packager/packager b/docker/packager/packager index 2dcbd8d695e..da4af7fc1be 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -276,10 +276,7 @@ def parse_env_variables( if is_release_build(debug_build, package_type, sanitizer, coverage): cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON") result.append("WITH_PERFORMANCE=1") - if is_cross_arm: - cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1") - else: - result.append("BUILD_MUSL_KEEPER=1") + cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1") elif package_type == "fuzzers": cmake_flags.append("-DENABLE_FUZZING=1") cmake_flags.append("-DENABLE_PROTOBUF=1") diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 4640882f2be..b06290ae352 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -66,18 +66,18 @@ else() message(STATUS "Library bridge mode: OFF") endif() -if (ENABLE_CLICKHOUSE_KEEPER) - message(STATUS "ClickHouse keeper mode: ON") -else() - message(STATUS "ClickHouse keeper mode: OFF") -endif() - if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) message(STATUS "ClickHouse keeper-converter mode: ON") else() message(STATUS "ClickHouse keeper-converter mode: OFF") endif() +if (ENABLE_CLICKHOUSE_KEEPER) + message(STATUS "ClickHouse keeper mode: ON") +else() + message(STATUS "ClickHouse keeper mode: OFF") +endif() + if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) message(STATUS "ClickHouse keeper-client mode: ON") else() @@ -131,10 +131,6 @@ add_subdirectory (static-files-disk-uploader) add_subdirectory (su) add_subdirectory (disks) -if (ENABLE_CLICKHOUSE_KEEPER) - add_subdirectory (keeper) -endif() - if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) add_subdirectory (keeper-converter) endif() @@ -143,6 +139,10 @@ if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) add_subdirectory (keeper-client) endif() +if (ENABLE_CLICKHOUSE_KEEPER) + add_subdirectory (keeper) +endif() + if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) add_subdirectory (odbc-bridge) endif () diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 079951be55e..9b931c49c24 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -1,4 +1,5 @@ set(CLICKHOUSE_KEEPER_SOURCES + keeper_main.cpp Keeper.cpp ) @@ -8,6 +9,9 @@ set (CLICKHOUSE_KEEPER_LINK clickhouse_common_io clickhouse_common_zookeeper daemon + clickhouse-keeper-converter-lib + clickhouse-keeper-client-lib + clickhouse_functions dbms ) @@ -17,199 +21,11 @@ install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-ke if (BUILD_STANDALONE_KEEPER) # Straight list of all required sources - set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperReconfiguration.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/RaftServerConfig.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/CoordinationSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/FourLetterCommand.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/InMemoryLogStore.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperConnectionStats.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperDispatcher.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperLogStore.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperServer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperContext.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperFeatureFlags.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManager.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManagerS3.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateMachine.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperContext.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateManager.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperConstants.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperAsynchronousMetrics.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperCommon.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SessionExpiryQueue.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SummingStateMachine.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/WriteBufferFromNuraftBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ZooKeeperDataReader.cpp + clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_SOURCES}) - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsFields.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BaseSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/ServerSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/Field.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsEnums.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/ServerUUID.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/UUID.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BackgroundSchedulePool.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/IO/ReadBuffer.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPPathHints.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperTCPHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/TCPServer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/NotFoundHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ProtocolServerAdapter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/CertificateReloader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperReadinessHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/CloudPlacementInfo.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnection.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerRequest.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerResponse.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnectionFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CachedCompressedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CheckingCompressedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferBase.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecZSTD.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/ICompressionCodec.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/LZ4_decompress_faster.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/CurrentThread.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollections.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollectionConfiguration.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/Jemalloc.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/IKeeper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/TestKeeper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperCommon.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperConstants.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperImpl.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperIO.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperLock.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperNodeCache.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/registerDisks.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IDisk.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskSelector.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskLocal.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskLocalCheckThread.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/LocalDirectorySyncGuard.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/TemporaryFileOnDisk.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/loadLocalDiskConfig.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskType.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/IObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataOperationsHolder.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIterator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/StoredObject.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/DiskS3Utils.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/IOUringReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getIOUringReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferFromTemporaryFile.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferWithFinalizeCallback.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/AsynchronousBoundedReadBuffer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getThreadPoolReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolRemoteFSReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolReader.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp - ${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/Context.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/Settings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/ThreadStatusExt.cpp - - Keeper.cpp - clickhouse-keeper.cpp - ) - - # List of resources for clickhouse-keeper client - if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) - list(APPEND CLICKHOUSE_KEEPER_STANDALONE_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/KeeperClient.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Commands.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Parser.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/LineReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/ReplxxLineReader.cpp - ) - endif() - - clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_STANDALONE_SOURCES}) - - # Remove some redundant dependencies - target_compile_definitions (clickhouse-keeper PRIVATE -DCLICKHOUSE_KEEPER_STANDALONE_BUILD) - target_compile_definitions (clickhouse-keeper PUBLIC -DWITHOUT_TEXT_LOG) - - if (ENABLE_CLICKHOUSE_KEEPER_CLIENT AND TARGET ch_rust::skim) - target_link_libraries(clickhouse-keeper PRIVATE ch_rust::skim) - endif() - - target_link_libraries(clickhouse-keeper - PRIVATE - ch_contrib::abseil_swiss_tables - ch_contrib::nuraft - ch_contrib::lz4 - ch_contrib::zstd - ch_contrib::cityhash - ch_contrib::jemalloc - common ch_contrib::double_conversion - ch_contrib::dragonbox_to_chars - pcg_random - ch_contrib::pdqsort - ch_contrib::miniselect - clickhouse_common_config_no_zookeeper_log - loggers_no_text_log - clickhouse_common_io - clickhouse_parsers # Otherwise compression will not built. FIXME. - ) + target_link_libraries(clickhouse-keeper PUBLIC ${CLICKHOUSE_KEEPER_LINK}) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) - if (SPLIT_DEBUG_SYMBOLS) clickhouse_split_debug_symbols(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-keeper) else() diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index f14ef2e5552..60834dbe582 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -75,16 +75,6 @@ int mainEntryClickHouseKeeper(int argc, char ** argv) } } -#ifdef CLICKHOUSE_KEEPER_STANDALONE_BUILD - -// Weak symbols don't work correctly on Darwin -// so we have a stub implementation to avoid linker errors -void collectCrashLog( - Int32, UInt64, const String &, const StackTrace &) -{} - -#endif - namespace DB { diff --git a/programs/keeper/keeper_main.cpp b/programs/keeper/keeper_main.cpp new file mode 100644 index 00000000000..a5bc5db7be8 --- /dev/null +++ b/programs/keeper/keeper_main.cpp @@ -0,0 +1,443 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include /// pair + +#include + +#include "config.h" +#include "config_tools.h" + +#include +#include +#include + +#include +#include + + +int mainEntryClickHouseKeeper(int argc, char ** argv); +#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER +int mainEntryClickHouseKeeperConverter(int argc, char ** argv); +#endif +#if ENABLE_CLICKHOUSE_KEEPER_CLIENT +int mainEntryClickHouseKeeperClient(int argc, char ** argv); +#endif + +namespace +{ + +using MainFunc = int (*)(int, char**); + +/// Add an item here to register new application +std::pair clickhouse_applications[] = +{ + // keeper + {"keeper", mainEntryClickHouseKeeper}, +#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER + {"converter", mainEntryClickHouseKeeperConverter}, + {"keeper-converter", mainEntryClickHouseKeeperConverter}, +#endif +#if ENABLE_CLICKHOUSE_KEEPER_CLIENT + {"client", mainEntryClickHouseKeeperClient}, + {"keeper-client", mainEntryClickHouseKeeperClient}, +#endif + +}; + +int printHelp(int, char **) +{ + std::cerr << "Use one of the following commands:" << std::endl; + for (auto & application : clickhouse_applications) + std::cerr << "clickhouse " << application.first << " [args] " << std::endl; + return -1; +} + + +enum class InstructionFail : uint8_t +{ + NONE = 0, + SSE3 = 1, + SSSE3 = 2, + SSE4_1 = 3, + SSE4_2 = 4, + POPCNT = 5, + AVX = 6, + AVX2 = 7, + AVX512 = 8 +}; + +auto instructionFailToString(InstructionFail fail) +{ + switch (fail) + { +#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) + case InstructionFail::NONE: + ret("NONE"); + case InstructionFail::SSE3: + ret("SSE3"); + case InstructionFail::SSSE3: + ret("SSSE3"); + case InstructionFail::SSE4_1: + ret("SSE4.1"); + case InstructionFail::SSE4_2: + ret("SSE4.2"); + case InstructionFail::POPCNT: + ret("POPCNT"); + case InstructionFail::AVX: + ret("AVX"); + case InstructionFail::AVX2: + ret("AVX2"); + case InstructionFail::AVX512: + ret("AVX512"); +#undef ret + } +} + + +sigjmp_buf jmpbuf; + +[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *) +{ + siglongjmp(jmpbuf, 1); +} + +/// Check if necessary SSE extensions are available by trying to execute some sse instructions. +/// If instruction is unavailable, SIGILL will be sent by kernel. +void checkRequiredInstructionsImpl(volatile InstructionFail & fail) +{ +#if defined(__SSE3__) + fail = InstructionFail::SSE3; + __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if defined(__SSSE3__) + fail = InstructionFail::SSSE3; + __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); + +#endif + +#if defined(__SSE4_1__) + fail = InstructionFail::SSE4_1; + __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if defined(__SSE4_2__) + fail = InstructionFail::SSE4_2; + __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); +#endif + + /// Defined by -msse4.2 +#if defined(__POPCNT__) + fail = InstructionFail::POPCNT; + { + uint64_t a = 0; + uint64_t b = 0; + __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); + } +#endif + +#if defined(__AVX__) + fail = InstructionFail::AVX; + __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if defined(__AVX2__) + fail = InstructionFail::AVX2; + __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if defined(__AVX512__) + fail = InstructionFail::AVX512; + __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); +#endif + + fail = InstructionFail::NONE; +} + +/// Macros to avoid using strlen(), since it may fail if SSE is not supported. +#define writeError(data) do \ + { \ + static_assert(__builtin_constant_p(data)); \ + if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ + _Exit(1); \ + } while (false) + +/// Check SSE and others instructions availability. Calls exit on fail. +/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. +void checkRequiredInstructions() +{ + struct sigaction sa{}; + struct sigaction sa_old{}; + sa.sa_sigaction = sigIllCheckHandler; + sa.sa_flags = SA_SIGINFO; + auto signal = SIGILL; + if (sigemptyset(&sa.sa_mask) != 0 + || sigaddset(&sa.sa_mask, signal) != 0 + || sigaction(signal, &sa, &sa_old) != 0) + { + /// You may wonder about strlen. + /// Typical implementation of strlen is using SSE4.2 or AVX2. + /// But this is not the case because it's compiler builtin and is executed at compile time. + + writeError("Can not set signal handler\n"); + _Exit(1); + } + + volatile InstructionFail fail = InstructionFail::NONE; + + if (sigsetjmp(jmpbuf, 1)) + { + writeError("Instruction check fail. The CPU does not support "); + if (!std::apply(writeRetry, instructionFailToString(fail))) + _Exit(1); + writeError(" instruction set.\n"); + _Exit(1); + } + + checkRequiredInstructionsImpl(fail); + + if (sigaction(signal, &sa_old, nullptr)) + { + writeError("Can not set signal handler\n"); + _Exit(1); + } +} + +struct Checker +{ + Checker() + { + checkRequiredInstructions(); + } +} checker +#ifndef OS_DARWIN + __attribute__((init_priority(101))) /// Run before other static initializers. +#endif +; + + +#if !defined(USE_MUSL) +/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. +void checkHarmfulEnvironmentVariables(char ** argv) +{ + std::initializer_list harmful_env_variables = { + /// The list is a selection from "man ld-linux". + "LD_PRELOAD", + "LD_LIBRARY_PATH", + "LD_ORIGIN_PATH", + "LD_AUDIT", + "LD_DYNAMIC_WEAK", + /// The list is a selection from "man dyld" (osx). + "DYLD_LIBRARY_PATH", + "DYLD_FALLBACK_LIBRARY_PATH", + "DYLD_VERSIONED_LIBRARY_PATH", + "DYLD_INSERT_LIBRARIES", + }; + + bool require_reexec = false; + for (const auto * var : harmful_env_variables) + { + if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe) + { + /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful + if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently + { + fmt::print(stderr, "Cannot override {} environment variable", var); + _exit(1); + } + require_reexec = true; + } + } + + if (require_reexec) + { + /// Use execvp() over execv() to search in PATH. + /// + /// This should be safe, since: + /// - if argv[0] is relative path - it is OK + /// - if argv[0] has only basename, the it will search in PATH, like shell will do. + /// + /// Also note, that this (search in PATH) because there is no easy and + /// portable way to get absolute path of argv[0]. + /// - on linux there is /proc/self/exec and AT_EXECFN + /// - but on other OSes there is no such thing (especially on OSX). + /// + /// And since static linking will be done someday anyway, + /// let's not pollute the code base with special cases. + int error = execvp(argv[0], argv); + _exit(error); + } +} +#endif + + +#if defined(SANITIZE_COVERAGE) +__attribute__((no_sanitize("coverage"))) +void dumpCoverage() +{ + /// A user can request to dump the coverage information into files at exit. + /// This is useful for non-server applications such as clickhouse-format or clickhouse-client, + /// that cannot introspect it with SQL functions at runtime. + + /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid' + /// containing the list of addresses of covered . + + /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header. + + if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe) + { + auto dump = [](const std::string & name, auto span) + { + /// Write only non-zeros. + std::vector data; + data.reserve(span.size()); + for (auto addr : span) + if (addr) + data.push_back(addr); + + int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400); + if (-1 == fd) + { + writeError("Cannot open a file to write the coverage data\n"); + } + else + { + if (!writeRetry(fd, reinterpret_cast(data.data()), data.size() * sizeof(data[0]))) + writeError("Cannot write the coverage data to a file\n"); + if (0 != ::close(fd)) + writeError("Cannot close the file with coverage data\n"); + } + }; + + dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage()); + } +} +#endif + +} + +bool isClickhouseApp(std::string_view app_suffix, std::vector & argv) +{ + /// Use app if the first arg 'app' is passed (the arg should be quietly removed) + if (argv.size() >= 2) + { + auto first_arg = argv.begin() + 1; + + /// 'clickhouse --client ...' and 'clickhouse client ...' are Ok + if (*first_arg == app_suffix + || (std::string_view(*first_arg).starts_with("--") && std::string_view(*first_arg).substr(2) == app_suffix)) + { + argv.erase(first_arg); + return true; + } + } + + return false; +} + +/// Don't allow dlopen in the main ClickHouse binary, because it is harmful and insecure. +/// We don't use it. But it can be used by some libraries for implementation of "plugins". +/// We absolutely discourage the ancient technique of loading +/// 3rd-party uncontrolled dangerous libraries into the process address space, +/// because it is insane. + +#if !defined(USE_MUSL) +extern "C" +{ + void * dlopen(const char *, int) + { + return nullptr; + } + + void * dlmopen(long, const char *, int) // NOLINT + { + return nullptr; + } + + int dlclose(void *) + { + return 0; + } + + const char * dlerror() + { + return "ClickHouse does not allow dynamic library loading"; + } +} +#endif + +/// Prevent messages from JeMalloc in the release build. +/// Some of these messages are non-actionable for the users, such as: +/// : Number of CPUs detected is not deterministic. Per-CPU arena disabled. +#if USE_JEMALLOC && defined(NDEBUG) && !defined(SANITIZER) +extern "C" void (*malloc_message)(void *, const char *s); +__attribute__((constructor(0))) void init_je_malloc_message() { malloc_message = [](void *, const char *){}; } +#endif + +/// This allows to implement assert to forbid initialization of a class in static constructors. +/// Usage: +/// +/// extern bool inside_main; +/// class C { C() { assert(inside_main); } }; +bool inside_main = false; + +int main(int argc_, char ** argv_) +{ + inside_main = true; + SCOPE_EXIT({ inside_main = false; }); + + /// PHDR cache is required for query profiler to work reliably + /// It also speed up exception handling, but exceptions from dynamically loaded libraries (dlopen) + /// will work only after additional call of this function. + /// Note: we forbid dlopen in our code. + updatePHDRCache(); + +#if !defined(USE_MUSL) + checkHarmfulEnvironmentVariables(argv_); +#endif + + /// This is used for testing. For example, + /// clickhouse-local should be able to run a simple query without throw/catch. + if (getenv("CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION")) // NOLINT(concurrency-mt-unsafe) + DB::terminate_on_any_exception = true; + + /// Reset new handler to default (that throws std::bad_alloc) + /// It is needed because LLVM library clobbers it. + std::set_new_handler(nullptr); + + std::vector argv(argv_, argv_ + argc_); + + /// Print a basic help if nothing was matched + MainFunc main_func = mainEntryClickHouseKeeper; + + if (isClickhouseApp("help", argv)) + { + main_func = printHelp; + } + else + { + for (auto & application : clickhouse_applications) + { + if (isClickhouseApp(application.first, argv)) + { + main_func = application.second; + break; + } + } + } + + int exit_code = main_func(static_cast(argv.size()), argv.data()); + +#if defined(SANITIZE_COVERAGE) + dumpCoverage(); +#endif + + return exit_code; +} From 4a9daa202d74ba30fc3efd455f6a37a41bb4e4db Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 1 Jul 2024 11:24:45 +0200 Subject: [PATCH 127/273] Remove Keeper standalone build --- docker/packager/binary-builder/build.sh | 1 + programs/self-extracting/CMakeLists.txt | 17 +- src/Compression/CompressionFactory.cpp | 4 - src/Coordination/Standalone/Context.cpp | 486 ------------------ src/Coordination/Standalone/Context.h | 178 ------- src/Coordination/Standalone/Settings.cpp | 24 - .../Standalone/ThreadStatusExt.cpp | 19 - src/Core/SettingsFields.cpp | 45 -- src/Core/SettingsFields.h | 14 +- src/Daemon/BaseDaemon.cpp | 5 - src/Daemon/SentryWriter.cpp | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 2 - src/Disks/ObjectStorages/DiskObjectStorage.h | 2 - .../DiskObjectStorageMetadata.cpp | 4 - .../ObjectStorages/MetadataStorageFactory.cpp | 6 - .../ObjectStorages/ObjectStorageFactory.cpp | 18 +- .../createMetadataStorageMetrics.h | 12 +- src/Disks/registerDisks.cpp | 15 - src/IO/S3/BlobStorageLogWriter.cpp | 2 - src/Interpreters/Context.h | 8 - src/Server/PrometheusRequestHandler.cpp | 3 - src/Server/ProtocolServerAdapter.cpp | 4 +- src/Server/ProtocolServerAdapter.h | 2 +- 23 files changed, 36 insertions(+), 837 deletions(-) delete mode 100644 src/Coordination/Standalone/Context.cpp delete mode 100644 src/Coordination/Standalone/Context.h delete mode 100644 src/Coordination/Standalone/Settings.cpp delete mode 100644 src/Coordination/Standalone/ThreadStatusExt.cpp diff --git a/docker/packager/binary-builder/build.sh b/docker/packager/binary-builder/build.sh index 032aceb0af3..bd5f2fe8466 100755 --- a/docker/packager/binary-builder/build.sh +++ b/docker/packager/binary-builder/build.sh @@ -111,6 +111,7 @@ fi mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output [ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output [ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output +[ -x ./programs/self-extracting/clickhouse-keeper ] && mv ./programs/self-extracting/clickhouse-keeper /output mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds mv ./programs/*.dict ./programs/*.options ./programs/*_seed_corpus.zip /output ||: # libFuzzer oss-fuzz compatible infrastructure diff --git a/programs/self-extracting/CMakeLists.txt b/programs/self-extracting/CMakeLists.txt index 4b6dd07f618..32b686d40dd 100644 --- a/programs/self-extracting/CMakeLists.txt +++ b/programs/self-extracting/CMakeLists.txt @@ -10,9 +10,24 @@ else () set (COMPRESSOR "${PROJECT_BINARY_DIR}/utils/self-extracting-executable/compressor") endif () -add_custom_target (self-extracting ALL +add_custom_target (self-extracting-server ALL ${CMAKE_COMMAND} -E remove clickhouse clickhouse-stripped COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse ../clickhouse COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse-stripped ../clickhouse-stripped DEPENDS clickhouse clickhouse-stripped compressor ) + +set(self_extracting_deps "self-extracting-server") + +if (BUILD_STANDALONE_KEEPER) + add_custom_target (self-extracting-keeper ALL + ${CMAKE_COMMAND} -E remove clickhouse-keeper + COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse-keeper ../clickhouse-keeper + DEPENDS compressor clickhouse-keeper + ) + list(APPEND self_extracting_deps "self-extracting-keeper") +endif() + +add_custom_target (self-extracting ALL + DEPENDS ${self_extracting_deps} +) diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 68e0131c91b..2e7aa0d086f 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -185,7 +185,6 @@ void registerCodecDeflateQpl(CompressionCodecFactory & factory); /// Keeper use only general-purpose codecs, so we don't need these special codecs /// in standalone build -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerCodecDelta(CompressionCodecFactory & factory); void registerCodecT64(CompressionCodecFactory & factory); void registerCodecDoubleDelta(CompressionCodecFactory & factory); @@ -193,7 +192,6 @@ void registerCodecGorilla(CompressionCodecFactory & factory); void registerCodecEncrypted(CompressionCodecFactory & factory); void registerCodecFPC(CompressionCodecFactory & factory); void registerCodecGCD(CompressionCodecFactory & factory); -#endif CompressionCodecFactory::CompressionCodecFactory() { @@ -205,7 +203,6 @@ CompressionCodecFactory::CompressionCodecFactory() #endif registerCodecLZ4HC(*this); registerCodecMultiple(*this); -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD registerCodecDelta(*this); registerCodecT64(*this); registerCodecDoubleDelta(*this); @@ -216,7 +213,6 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecDeflateQpl(*this); #endif registerCodecGCD(*this); -#endif default_codec = get("LZ4", {}); } diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp deleted file mode 100644 index 2017adcc58d..00000000000 --- a/src/Coordination/Standalone/Context.cpp +++ /dev/null @@ -1,486 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include -#include - -namespace ProfileEvents -{ - extern const Event ContextLock; - extern const Event ContextLockWaitMicroseconds; -} - -namespace CurrentMetrics -{ - extern const Metric ContextLockWait; - extern const Metric BackgroundSchedulePoolTask; - extern const Metric BackgroundSchedulePoolSize; - extern const Metric IOWriterThreads; - extern const Metric IOWriterThreadsActive; - extern const Metric IOWriterThreadsScheduled; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int UNSUPPORTED_METHOD; -} - -struct ContextSharedPart : boost::noncopyable -{ - ContextSharedPart() - : macros(std::make_unique()) - {} - - ~ContextSharedPart() - { - if (keeper_dispatcher) - { - try - { - keeper_dispatcher->shutdown(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - /// Wait for thread pool for background reads and writes, - /// since it may use per-user MemoryTracker which will be destroyed here. - if (asynchronous_remote_fs_reader) - { - try - { - asynchronous_remote_fs_reader->wait(); - asynchronous_remote_fs_reader.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (asynchronous_local_fs_reader) - { - try - { - asynchronous_local_fs_reader->wait(); - asynchronous_local_fs_reader.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (synchronous_local_fs_reader) - { - try - { - synchronous_local_fs_reader->wait(); - synchronous_local_fs_reader.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (threadpool_writer) - { - try - { - threadpool_writer->wait(); - threadpool_writer.reset(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - } - - /// For access of most of shared objects. - mutable SharedMutex mutex; - - ServerSettings server_settings; - - String path; /// Path to the data directory, with a slash at the end. - ConfigurationPtr config; /// Global configuration settings. - MultiVersion macros; /// Substitutions extracted from config. - OnceFlag schedule_pool_initialized; - mutable std::unique_ptr schedule_pool; /// A thread pool that can run different jobs in background - RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml - - mutable OnceFlag readers_initialized; - mutable std::unique_ptr asynchronous_remote_fs_reader; - mutable std::unique_ptr asynchronous_local_fs_reader; - mutable std::unique_ptr synchronous_local_fs_reader; - -#if USE_LIBURING - mutable OnceFlag io_uring_reader_initialized; - mutable std::unique_ptr io_uring_reader; -#endif - - mutable OnceFlag threadpool_writer_initialized; - mutable std::unique_ptr threadpool_writer; - - mutable ThrottlerPtr remote_read_throttler; /// A server-wide throttler for remote IO reads - mutable ThrottlerPtr remote_write_throttler; /// A server-wide throttler for remote IO writes - - mutable ThrottlerPtr local_read_throttler; /// A server-wide throttler for local IO reads - mutable ThrottlerPtr local_write_throttler; /// A server-wide throttler for local IO writes - - std::optional storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage - - mutable std::mutex keeper_dispatcher_mutex; - mutable std::shared_ptr keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex); -}; - -ContextData::ContextData() = default; -ContextData::ContextData(const ContextData &) = default; - -Context::Context() = default; -Context::Context(const Context & rhs) : ContextData(rhs), std::enable_shared_from_this(rhs) {} -Context::~Context() = default; - -SharedContextHolder::SharedContextHolder(SharedContextHolder &&) noexcept = default; -SharedContextHolder & SharedContextHolder::operator=(SharedContextHolder &&) noexcept = default; -SharedContextHolder::SharedContextHolder() = default; -SharedContextHolder::~SharedContextHolder() = default; -SharedContextHolder::SharedContextHolder(std::unique_ptr shared_context) - : shared(std::move(shared_context)) {} - -void SharedContextHolder::reset() { shared.reset(); } - -void Context::makeGlobalContext() -{ - initGlobal(); - global_context = shared_from_this(); -} - -ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part) -{ - auto res = std::shared_ptr(new Context); - res->shared = shared_part; - return res; -} - -void Context::initGlobal() -{ - assert(!global_context_instance); - global_context_instance = shared_from_this(); -} - -SharedContextHolder Context::createShared() -{ - return SharedContextHolder(std::make_unique()); -} - - -ContextMutablePtr Context::getGlobalContext() const -{ - auto ptr = global_context.lock(); - if (!ptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no global context or global context has expired"); - return ptr; -} - -std::unique_lock Context::getGlobalLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::unique_lock(shared->mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::shared_lock Context::getGlobalSharedLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::shared_lock(shared->mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::unique_lock Context::getLocalLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::unique_lock(mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::shared_lock Context::getLocalSharedLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::shared_lock(mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -String Context::getPath() const -{ - auto lock = getGlobalSharedLock(); - return shared->path; -} - -void Context::setPath(const String & path) -{ - auto lock = getGlobalLock(); - shared->path = path; -} - -MultiVersion::Version Context::getMacros() const -{ - return shared->macros.get(); -} - -void Context::setMacros(std::unique_ptr && macros) -{ - shared->macros.set(std::move(macros)); -} - -BackgroundSchedulePool & Context::getSchedulePool() const -{ - callOnce(shared->schedule_pool_initialized, [&] { - shared->schedule_pool = std::make_unique( - shared->server_settings.background_schedule_pool_size, - CurrentMetrics::BackgroundSchedulePoolTask, - CurrentMetrics::BackgroundSchedulePoolSize, - "BgSchPool"); - }); - - return *shared->schedule_pool; -} - -void Context::setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config) -{ - shared->remote_host_filter.setValuesFromConfig(config); -} - -const RemoteHostFilter & Context::getRemoteHostFilter() const -{ - return shared->remote_host_filter; -} - -IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const -{ - callOnce(shared->readers_initialized, [&] { - const auto & config = getConfigRef(); - shared->asynchronous_remote_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER, config); - shared->asynchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER, config); - shared->synchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER, config); - }); - - switch (type) - { - case FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER: - return *shared->asynchronous_remote_fs_reader; - case FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER: - return *shared->asynchronous_local_fs_reader; - case FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER: - return *shared->synchronous_local_fs_reader; - } -} - -#if USE_LIBURING -IOUringReader & Context::getIOUringReader() const -{ - callOnce(shared->io_uring_reader_initialized, [&] { - shared->io_uring_reader = createIOUringReader(); - }); - - return *shared->io_uring_reader; -} -#endif - -std::shared_ptr Context::getFilesystemCacheLog() const -{ - return nullptr; -} - -std::shared_ptr Context::getFilesystemReadPrefetchesLog() const -{ - return nullptr; -} - -std::shared_ptr Context::getBlobStorageLog() const -{ - return nullptr; -} - -void Context::setConfig(const ConfigurationPtr & config) -{ - auto lock = getGlobalLock(); - shared->config = config; -} - -const Poco::Util::AbstractConfiguration & Context::getConfigRef() const -{ - auto lock = getGlobalSharedLock(); - return shared->config ? *shared->config : Poco::Util::Application::instance().config(); -} - -std::shared_ptr Context::getAsyncReadCounters() const -{ - auto lock = getLocalLock(); - if (!async_read_counters) - async_read_counters = std::make_shared(); - return async_read_counters; -} - -ThreadPool & Context::getThreadPoolWriter() const -{ - callOnce(shared->threadpool_writer_initialized, [&] { - const auto & config = getConfigRef(); - auto pool_size = config.getUInt(".threadpool_writer_pool_size", 100); - auto queue_size = config.getUInt(".threadpool_writer_queue_size", 1000000); - - shared->threadpool_writer = std::make_unique( - CurrentMetrics::IOWriterThreads, CurrentMetrics::IOWriterThreadsActive, CurrentMetrics::IOWriterThreadsScheduled, pool_size, pool_size, queue_size); - }); - - return *shared->threadpool_writer; -} - -ThrottlerPtr Context::getRemoteReadThrottler() const -{ - return nullptr; -} - -ThrottlerPtr Context::getRemoteWriteThrottler() const -{ - return nullptr; -} - -ThrottlerPtr Context::getLocalReadThrottler() const -{ - return nullptr; -} - -ThrottlerPtr Context::getLocalWriteThrottler() const -{ - return nullptr; -} - -ReadSettings Context::getReadSettings() const -{ - return ReadSettings{}; -} - -ResourceManagerPtr Context::getResourceManager() const -{ - return nullptr; -} - -ClassifierPtr Context::getWorkloadClassifier() const -{ - return nullptr; -} - -void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) const -{ - const auto & config_ref = getConfigRef(); - - std::lock_guard lock(shared->keeper_dispatcher_mutex); - - if (shared->keeper_dispatcher) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize Keeper multiple times"); - - if (config_ref.has("keeper_server")) - { - shared->keeper_dispatcher = std::make_shared(); - shared->keeper_dispatcher->initialize(config_ref, true, start_async, getMacros()); - } -} - -std::shared_ptr Context::getKeeperDispatcher() const -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - if (!shared->keeper_dispatcher) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Keeper must be initialized before requests"); - - return shared->keeper_dispatcher; -} - -std::shared_ptr Context::tryGetKeeperDispatcher() const -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - return shared->keeper_dispatcher; -} - -void Context::shutdownKeeperDispatcher() const -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - if (shared->keeper_dispatcher) - { - shared->keeper_dispatcher->shutdown(); - shared->keeper_dispatcher.reset(); - } -} - -void Context::updateKeeperConfiguration([[maybe_unused]] const Poco::Util::AbstractConfiguration & config_) -{ - std::lock_guard lock(shared->keeper_dispatcher_mutex); - if (!shared->keeper_dispatcher) - return; - - shared->keeper_dispatcher->updateConfiguration(config_, getMacros()); -} - -std::shared_ptr Context::getZooKeeper() const -{ - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot connect to ZooKeeper from Keeper"); -} - -const S3SettingsByEndpoint & Context::getStorageS3Settings() const -{ - std::lock_guard lock(shared->mutex); - - if (!shared->storage_s3_settings) - { - const auto & config = shared->config ? *shared->config : Poco::Util::Application::instance().config(); - shared->storage_s3_settings.emplace().loadFromConfig(config, "s3", getSettingsRef()); - } - - return *shared->storage_s3_settings; -} - -const ServerSettings & Context::getServerSettings() const -{ - return shared->server_settings; -} - -bool Context::hasTraceCollector() const -{ - return false; -} - -bool Context::isBackgroundOperationContext() const -{ - return false; -} - -} diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h deleted file mode 100644 index d3bbfececed..00000000000 --- a/src/Coordination/Standalone/Context.h +++ /dev/null @@ -1,178 +0,0 @@ -#pragma once - -#include - -#include - -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include - -#include - -#include - -#include "config.h" -namespace zkutil -{ - class ZooKeeper; - using ZooKeeperPtr = std::shared_ptr; -} - -namespace DB -{ - -struct ContextSharedPart; -class Macros; -class FilesystemCacheLog; -class FilesystemReadPrefetchesLog; -class BlobStorageLog; -class IOUringReader; -class S3SettingsByEndpoint; - -/// A small class which owns ContextShared. -/// We don't use something like unique_ptr directly to allow ContextShared type to be incomplete. -struct SharedContextHolder -{ - ~SharedContextHolder(); - SharedContextHolder(); - explicit SharedContextHolder(std::unique_ptr shared_context); - SharedContextHolder(SharedContextHolder &&) noexcept; - - SharedContextHolder & operator=(SharedContextHolder &&) noexcept; - - ContextSharedPart * get() const { return shared.get(); } - void reset(); -private: - std::unique_ptr shared; -}; - -class ContextData -{ -protected: - ContextWeakMutablePtr global_context; - inline static ContextPtr global_context_instance; - ContextSharedPart * shared; - - /// Query metrics for reading data asynchronously with IAsynchronousReader. - mutable std::shared_ptr async_read_counters; - - Settings settings; /// Setting for query execution. - -public: - /// Use copy constructor or createGlobal() instead - ContextData(); - ContextData(const ContextData &); -}; - -class Context : public ContextData, public std::enable_shared_from_this -{ -private: - /// ContextData mutex - mutable SharedMutex mutex; - - Context(); - Context(const Context &); - - std::unique_lock getGlobalLock() const; - - std::shared_lock getGlobalSharedLock() const; - - std::unique_lock getLocalLock() const; - - std::shared_lock getLocalSharedLock() const; - -public: - /// Create initial Context with ContextShared and etc. - static ContextMutablePtr createGlobal(ContextSharedPart * shared_part); - static SharedContextHolder createShared(); - - ContextMutablePtr getGlobalContext() const; - static ContextPtr getGlobalContextInstance() { return global_context_instance; } - - void makeGlobalContext(); - void initGlobal(); - - ~Context(); - - using ConfigurationPtr = Poco::AutoPtr; - - /// Global application configuration settings. - void setConfig(const ConfigurationPtr & config); - const Poco::Util::AbstractConfiguration & getConfigRef() const; - - const Settings & getSettingsRef() const { return settings; } - - String getPath() const; - void setPath(const String & path); - - MultiVersion::Version getMacros() const; - void setMacros(std::unique_ptr && macros); - - BackgroundSchedulePool & getSchedulePool() const; - - /// Storage of allowed hosts from config.xml - void setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config); - const RemoteHostFilter & getRemoteHostFilter() const; - - std::shared_ptr getFilesystemCacheLog() const; - std::shared_ptr getFilesystemReadPrefetchesLog() const; - std::shared_ptr getBlobStorageLog() const; - - enum class ApplicationType : uint8_t - { - KEEPER, - SERVER, - }; - - void setApplicationType(ApplicationType) {} - ApplicationType getApplicationType() const { return ApplicationType::KEEPER; } - - IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const; -#if USE_LIBURING - IOUringReader & getIOUringReader() const; -#endif - std::shared_ptr getAsyncReadCounters() const; - ThreadPool & getThreadPoolWriter() const; - - ThrottlerPtr getRemoteReadThrottler() const; - ThrottlerPtr getRemoteWriteThrottler() const; - - ThrottlerPtr getLocalReadThrottler() const; - ThrottlerPtr getLocalWriteThrottler() const; - - ReadSettings getReadSettings() const; - - /// Resource management related - ResourceManagerPtr getResourceManager() const; - ClassifierPtr getWorkloadClassifier() const; - - std::shared_ptr getKeeperDispatcher() const; - std::shared_ptr tryGetKeeperDispatcher() const; - void initializeKeeperDispatcher(bool start_async) const; - void shutdownKeeperDispatcher() const; - void updateKeeperConfiguration(const Poco::Util::AbstractConfiguration & config); - - zkutil::ZooKeeperPtr getZooKeeper() const; - - const S3SettingsByEndpoint & getStorageS3Settings() const; - - const String & getUserName() const { static std::string user; return user; } - - const ServerSettings & getServerSettings() const; - - bool hasTraceCollector() const; - - bool isBackgroundOperationContext() const; -}; - -} diff --git a/src/Coordination/Standalone/Settings.cpp b/src/Coordination/Standalone/Settings.cpp deleted file mode 100644 index 12a7a42ffac..00000000000 --- a/src/Coordination/Standalone/Settings.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include - -namespace DB -{ - -IMPLEMENT_SETTINGS_TRAITS(SettingsTraits, LIST_OF_SETTINGS) - -std::vector Settings::getAllRegisteredNames() const -{ - std::vector all_settings; - for (const auto & setting_field : all()) - { - all_settings.push_back(setting_field.getName()); - } - return all_settings; -} - -void Settings::set(std::string_view name, const Field & value) -{ - BaseSettings::set(name, value); -} - - -} diff --git a/src/Coordination/Standalone/ThreadStatusExt.cpp b/src/Coordination/Standalone/ThreadStatusExt.cpp deleted file mode 100644 index fc78233d9dc..00000000000 --- a/src/Coordination/Standalone/ThreadStatusExt.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include -#include - -namespace DB -{ - -void CurrentThread::detachFromGroupIfNotDetached() -{ -} - -void CurrentThread::attachToGroup(const ThreadGroupPtr &) -{ -} - -void ThreadStatus::initGlobalProfiler(UInt64 /*global_profiler_real_time_period*/, UInt64 /*global_profiler_cpu_time_period*/) -{ -} - -} diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index caa8b3fdffd..7d094e2a107 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -380,15 +380,6 @@ void SettingFieldString::readBinary(ReadBuffer & in) *this = std::move(str); } -/// Unbeautiful workaround for clickhouse-keeper standalone build ("-DBUILD_STANDALONE_KEEPER=1"). -/// In this build, we don't build and link library dbms (to which SettingsField.cpp belongs) but -/// only build SettingsField.cpp. Further dependencies, e.g. DataTypeString and DataTypeMap below, -/// require building of further files for clickhouse-keeper. To keep dependencies slim, we don't do -/// that. The linker does not complain only because clickhouse-keeper does not call any of below -/// functions. A cleaner alternative would be more modular libraries, e.g. one for data types, which -/// could then be linked by the server and the linker. -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD - SettingFieldMap::SettingFieldMap(const Field & f) : value(fieldToMap(f)) {} String SettingFieldMap::toString() const @@ -428,42 +419,6 @@ void SettingFieldMap::readBinary(ReadBuffer & in) *this = map; } -#else - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -SettingFieldMap::SettingFieldMap(const Field &) : value(Map()) {} -String SettingFieldMap::toString() const -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - - -SettingFieldMap & SettingFieldMap::operator =(const Field &) -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - -void SettingFieldMap::parseFromString(const String &) -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - -void SettingFieldMap::writeBinary(WriteBuffer &) const -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - -void SettingFieldMap::readBinary(ReadBuffer &) -{ - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Setting of type Map not supported"); -} - -#endif - namespace { char stringToChar(const String & str) diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 19809348921..266141815e3 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -247,12 +247,6 @@ struct SettingFieldString void readBinary(ReadBuffer & in); }; -#ifdef CLICKHOUSE_KEEPER_STANDALONE_BUILD -#define NORETURN [[noreturn]] -#else -#define NORETURN -#endif - struct SettingFieldMap { public: @@ -269,11 +263,11 @@ public: operator const Map &() const { return value; } /// NOLINT explicit operator Field() const { return value; } - NORETURN String toString() const; - NORETURN void parseFromString(const String & str); + String toString() const; + void parseFromString(const String & str); - NORETURN void writeBinary(WriteBuffer & out) const; - NORETURN void readBinary(ReadBuffer & in); + void writeBinary(WriteBuffer & out) const; + void readBinary(ReadBuffer & in); }; #undef NORETURN diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index b2c425ceb79..48f76769a09 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -502,9 +502,7 @@ private: if (collectCrashLog) collectCrashLog(sig, thread_num, query_id, stack_trace); -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD Context::getGlobalContextInstance()->handleCrash(); -#endif /// Send crash report to developers (if configured) if (sig != SanitizerTrap) @@ -533,8 +531,6 @@ private: } } - /// ClickHouse Keeper does not link to some parts of Settings. -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// List changed settings. if (!query_id.empty()) { @@ -549,7 +545,6 @@ private: LOG_FATAL(log, "Changed settings: {}", changed_settings); } } -#endif /// When everything is done, we will try to send these error messages to the client. if (thread_ptr) diff --git a/src/Daemon/SentryWriter.cpp b/src/Daemon/SentryWriter.cpp index 9479dd65730..c51a1100639 100644 --- a/src/Daemon/SentryWriter.cpp +++ b/src/Daemon/SentryWriter.cpp @@ -19,7 +19,7 @@ #include "config.h" #include -#if USE_SENTRY && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_SENTRY # include # include diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index c77709c27eb..bb9761a3905 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -78,7 +78,6 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c std::unique_ptr buf; -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD if (with_file_cache) { auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path); @@ -96,7 +95,6 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c /* read_until_position */std::nullopt, cache_log); } -#endif /// Can't wrap CachedOnDiskReadBufferFromFile in CachedInMemoryReadBufferFromFile because the /// former doesn't support seeks. diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 59cc82d8c81..5c45a258806 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -195,7 +195,6 @@ public: /// DiskObjectStorage(CachedObjectStorage(CachedObjectStorage(S3ObjectStorage))) String getStructure() const { return fmt::format("DiskObjectStorage-{}({})", getName(), object_storage->getName()); } -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// Add a cache layer. /// Example: DiskObjectStorage(S3ObjectStorage) -> DiskObjectStorage(CachedObjectStorage(S3ObjectStorage)) /// There can be any number of cache layers: @@ -204,7 +203,6 @@ public: /// Get names of all cache layers. Name is how cache is defined in configuration file. NameSet getCacheLayersNames() const override; -#endif bool supportsStat() const override { return metadata_storage->supportsStat(); } struct stat stat(const String & path) const override; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp index 44854633d65..56d5d11ef8a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -222,11 +222,7 @@ ObjectKeyWithMetadata DiskObjectStorageMetadata::popLastObject() bool DiskObjectStorageMetadata::getWriteFullObjectKeySetting() { -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD return Context::getGlobalContextInstance()->getServerSettings().storage_metadata_write_full_object_key; -#else - return false; -#endif } } diff --git a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp index ab7c2069b43..a690ecd2757 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp @@ -2,9 +2,7 @@ #include #include #include -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include -#endif #include #include @@ -135,7 +133,6 @@ void registerPlainRewritableMetadataStorage(MetadataStorageFactory & factory) }); } -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerMetadataStorageFromStaticFilesWebServer(MetadataStorageFactory & factory) { factory.registerMetadataStorageType("web", []( @@ -147,7 +144,6 @@ void registerMetadataStorageFromStaticFilesWebServer(MetadataStorageFactory & fa return std::make_shared(assert_cast(*object_storage)); }); } -#endif void registerMetadataStorages() { @@ -155,9 +151,7 @@ void registerMetadataStorages() registerMetadataStorageFromDisk(factory); registerPlainMetadataStorage(factory); registerPlainRewritableMetadataStorage(factory); -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD registerMetadataStorageFromStaticFilesWebServer(factory); -#endif } } diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 1bf8250adff..5698d2ad588 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -7,19 +7,17 @@ #include #include #endif -#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_HDFS #include #include #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE #include #include #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include #include #include -#endif #include #include #include @@ -284,7 +282,7 @@ void registerS3PlainRewritableObjectStorage(ObjectStorageFactory & factory) #endif -#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_HDFS void registerHDFSObjectStorage(ObjectStorageFactory & factory) { factory.registerObjectStorageType( @@ -309,7 +307,7 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory) } #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE void registerAzureObjectStorage(ObjectStorageFactory & factory) { auto creator = []( @@ -333,7 +331,6 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) } #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerWebObjectStorage(ObjectStorageFactory & factory) { factory.registerObjectStorageType("web", []( @@ -381,7 +378,6 @@ void registerLocalObjectStorage(ObjectStorageFactory & factory) factory.registerObjectStorageType("local_blob_storage", creator); factory.registerObjectStorageType("local", creator); } -#endif void registerObjectStorages() { @@ -393,18 +389,16 @@ void registerObjectStorages() registerS3PlainRewritableObjectStorage(factory); #endif -#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_HDFS registerHDFSObjectStorage(factory); #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE registerAzureObjectStorage(factory); #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD registerWebObjectStorage(factory); registerLocalObjectStorage(factory); -#endif } } diff --git a/src/Disks/ObjectStorages/createMetadataStorageMetrics.h b/src/Disks/ObjectStorages/createMetadataStorageMetrics.h index 6dddc227ade..5cf1fbef2ab 100644 --- a/src/Disks/ObjectStorages/createMetadataStorageMetrics.h +++ b/src/Disks/ObjectStorages/createMetadataStorageMetrics.h @@ -1,14 +1,14 @@ #pragma once +#include "config.h" + #if USE_AWS_S3 # include #endif -#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_AZURE_BLOB_STORAGE # include #endif -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD -# include -#endif +#include #include namespace ProfileEvents @@ -42,7 +42,7 @@ inline MetadataStorageMetrics MetadataStorageMetrics::create inline MetadataStorageMetrics MetadataStorageMetrics::create() { @@ -53,7 +53,6 @@ inline MetadataStorageMetrics MetadataStorageMetrics::create inline MetadataStorageMetrics MetadataStorageMetrics::create() { @@ -62,6 +61,5 @@ inline MetadataStorageMetrics MetadataStorageMetrics::creategetBlobStorageLog()) { auto log_writer = std::make_shared(std::move(blob_storage_log)); @@ -67,7 +66,6 @@ BlobStorageLogWriterPtr BlobStorageLogWriter::create(const String & disk_name) return log_writer; } -#endif return {}; } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index f9b91a45978..d3f152b7a67 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1,7 +1,5 @@ #pragma once -#ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD - #include #include #include @@ -1451,9 +1449,3 @@ struct HTTPContext : public IHTTPContext }; } - -#else - -#include - -#endif diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index dff960f7031..1f3e038a1f5 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -18,9 +18,6 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe { try { - /// Raw config reference is used here to avoid dependency on Context and ServerSettings. - /// This is painful, because this class is also used in a build with CLICKHOUSE_KEEPER_STANDALONE_BUILD=1 - /// And there ordinary Context is replaced with a tiny clone. const auto & config = server.config(); unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); diff --git a/src/Server/ProtocolServerAdapter.cpp b/src/Server/ProtocolServerAdapter.cpp index 8d14a849894..b41ad2376f1 100644 --- a/src/Server/ProtocolServerAdapter.cpp +++ b/src/Server/ProtocolServerAdapter.cpp @@ -1,7 +1,7 @@ #include #include -#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_GRPC #include #endif @@ -37,7 +37,7 @@ ProtocolServerAdapter::ProtocolServerAdapter( { } -#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_GRPC class ProtocolServerAdapter::GRPCServerAdapterImpl : public Impl { public: diff --git a/src/Server/ProtocolServerAdapter.h b/src/Server/ProtocolServerAdapter.h index dd11c1dfc58..76a6776ed9c 100644 --- a/src/Server/ProtocolServerAdapter.h +++ b/src/Server/ProtocolServerAdapter.h @@ -23,7 +23,7 @@ public: ProtocolServerAdapter & operator =(ProtocolServerAdapter && src) = default; ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr tcp_server_); -#if USE_GRPC && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) +#if USE_GRPC ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr grpc_server_); #endif From 0b24a416b5f50bb4416d4d95dddd20a1e333b569 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Mon, 1 Jul 2024 10:31:29 +0000 Subject: [PATCH 128/273] Also throw error if bit shift positions is greater than the bit width of value --- src/Functions/bitShiftLeft.cpp | 19 ++++++++++------- src/Functions/bitShiftRight.cpp | 19 +++++++++-------- ...ror_for_negative_shift_positions.reference | 0 ...ows_error_for_negative_shift_positions.sql | 7 ------- ...t_throws_error_for_out_of_bounds.reference | 1 + ...t_shift_throws_error_for_out_of_bounds.sql | 21 +++++++++++++++++++ 6 files changed, 43 insertions(+), 24 deletions(-) delete mode 100644 tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.reference delete mode 100644 tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.sql create mode 100644 tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference create mode 100644 tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index c3f5de628aa..9d32e5b5ca4 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -7,6 +7,7 @@ namespace ErrorCodes { extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; + extern const int ARGUMENT_OUT_OF_BOUND; } namespace @@ -24,8 +25,8 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); - else if (b < 0) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); + else if (b < 0 || b > B(8 * sizeof(A))) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) << static_cast(b); else @@ -37,13 +38,15 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); - else if (b < 0) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else { UInt8 word_size = 8; + size_t n = end - pos; + if (b < 0 || b > B(word_size * n)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); + /// To prevent overflow - if (static_cast(b) >= (static_cast(end - pos) * word_size) || b < 0) + if (static_cast(b) >= (static_cast(n) * word_size)) { // insert default value out_vec.push_back(0); @@ -104,14 +107,14 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); - else if (b < 0) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else { UInt8 word_size = 8; size_t n = end - pos; + if (b < 0 || b > B(word_size * n)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size) || b < 0) + if (static_cast(b) >= (static_cast(n) * word_size)) { // insert default value out_vec.resize_fill(out_vec.size() + n); diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index b53485c45f5..13b210a4f63 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -8,7 +8,7 @@ namespace ErrorCodes { extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ARGUMENT_OUT_OF_BOUND; } namespace @@ -26,8 +26,8 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); - else if (b < 0) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); + else if (b < 0 || b > B(8 * sizeof(A))) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) >> static_cast(b); else @@ -54,13 +54,14 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); - else if (b < 0) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else { UInt8 word_size = 8; + size_t n = end - pos; + if (b < 0 || b > B(word_size * n)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); /// To prevent overflow - if (static_cast(b) >= (static_cast(end - pos) * word_size) || b < 0) + if (static_cast(b) >= (static_cast(n) * word_size)) { /// insert default value out_vec.push_back(0); @@ -93,14 +94,14 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); - else if (b < 0) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The number of shift positions needs to be a positive value"); else { UInt8 word_size = 8; size_t n = end - pos; + if (b < 0 || b > B(word_size * n)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size) || b < 0) + if (static_cast(b) >= (static_cast(n) * word_size)) { // insert default value out_vec.resize_fill(out_vec.size() + n); diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.reference b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.sql b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.sql deleted file mode 100644 index 659d03d1951..00000000000 --- a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_negative_shift_positions.sql +++ /dev/null @@ -1,7 +0,0 @@ -SELECT bitShiftRight(1, -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT bitShiftRight('hola', -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT bitShiftRight(toFixedString('hola', 10), -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } - -SELECT bitShiftLeft(1, -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT bitShiftLeft('hola', -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT bitShiftLeft(toFixedString('hola', 10), -1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } \ No newline at end of file diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql new file mode 100644 index 00000000000..9cfc6f00b91 --- /dev/null +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql @@ -0,0 +1,21 @@ +SELECT bitShiftRight(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toUInt8(1), number) FROM numbers(8 + 1) FORMAT Null; +SELECT bitShiftRight(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight('hola', number) FROM numbers(4 * 8 + 1) FORMAT Null; +SELECT bitShiftRight('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toFixedString('hola', 8), number) FROM numbers(8 * 8 + 1) FORMAT Null; +SELECT bitShiftRight(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } + +SELECT bitShiftLeft(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toUInt8(1), number) FROM numbers(8 + 1) FORMAT Null; +SELECT bitShiftLeft(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft('hola', number) FROM numbers(4 * 8 + 1) FORMAT Null; +SELECT bitShiftLeft('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toFixedString('hola', 8), number) FROM numbers(8 * 8 + 1) FORMAT Null; +SELECT bitShiftLeft(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } + +SELECT 'OK'; \ No newline at end of file From 0fed338ac26bb7a974fe8cfe8ac1b6dacbc3f4df Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Mon, 1 Jul 2024 11:03:25 +0000 Subject: [PATCH 129/273] Fix other bit shift tests after out of bounds check --- ...t_shift_right_for_string_integer.reference | 39 ------------------- ...016_bit_shift_right_for_string_integer.sql | 19 ++++----- ...it_shift_left_for_string_integer.reference | 39 ------------------- ...2017_bit_shift_left_for_string_integer.sql | 19 ++++----- ...t_shift_throws_error_for_out_of_bounds.sql | 6 --- 5 files changed, 16 insertions(+), 106 deletions(-) diff --git a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference index e6a2b2b6aaf..ab832478da0 100644 --- a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference +++ b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference @@ -41,8 +41,6 @@ String ConstConst 38 Hello 00000001 39 Hello 00000000 40 Hello -41 Hello -42 Hello FixedString ConstConst 1 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 @@ -92,10 +90,8 @@ FixedString ConstConst 78 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000001 79 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 80 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -81 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 String VectorVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 0010010000110010101101100011011000110111 7 Hello 0000000010010000110010101101100011011000 @@ -112,8 +108,6 @@ String VectorVector 33 Hello 00100100 39 Hello 00000000 40 Hello -41 Hello -42 Hello 7 Hel 000000001001000011001010 8 Hel 0100100001100101 9 Hel 0010010000110010 @@ -125,7 +119,6 @@ String VectorVector 9 Hel 0010010000110010 FixedString VectorVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 00100100001100101011011000110110001101111000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 @@ -142,8 +135,6 @@ FixedString VectorVector 33 Hello\0\0\0\0\0 00000000000000000000000000000000001001000011001010110110001101100011011110000000 39 Hello\0\0\0\0\0 00000000000000000000000000000000000000001001000011001010110110001101100011011110 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000100100001100101011011000110110001101111 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000010010000110010101101100011011000110111 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000001001000011001010110110001101100011011 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 00000000001001000011001010110110000000000000000000000000000000000000000000000000 @@ -171,9 +162,6 @@ String VectorConst 7 Hello 0000000010010000110010101101100011011000 7 Hello 0000000010010000110010101101100011011000 7 Hello 0000000010010000110010101101100011011000 -7 Hello 0000000010010000110010101101100011011000 -7 Hello 0000000010010000110010101101100011011000 -7 Hello 0000000010010000110010101101100011011000 7 Hel 000000001001000011001010 7 Hel 000000001001000011001010 7 Hel 000000001001000011001010 @@ -193,9 +181,6 @@ String VectorConst 8 Hello 01001000011001010110110001101100 8 Hello 01001000011001010110110001101100 8 Hello 01001000011001010110110001101100 -8 Hello 01001000011001010110110001101100 -8 Hello 01001000011001010110110001101100 -8 Hello 01001000011001010110110001101100 8 Hel 0100100001100101 8 Hel 0100100001100101 8 Hel 0100100001100101 @@ -217,9 +202,6 @@ FixedString VectorConst 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 @@ -239,15 +221,11 @@ FixedString VectorConst 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 -8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 -8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 -8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 String ConstVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 0010010000110010101101100011011000110111 7 Hello 0000000010010000110010101101100011011000 @@ -264,12 +242,9 @@ String ConstVector 33 Hello 00100100 39 Hello 00000000 40 Hello -41 Hello -42 Hello 7 Hello 0000000010010000110010101101100011011000 8 Hello 01001000011001010110110001101100 9 Hello 00100100001100101011011000110110 --1 Hel 0 Hel 010010000110010101101100 1 Hel 001001000011001010110110 7 Hel 000000001001000011001010 @@ -280,20 +255,11 @@ String ConstVector 17 Hel 00100100 23 Hel 00000000 24 Hel -25 Hel -31 Hel -32 Hel -33 Hel -39 Hel -40 Hel -41 Hel -42 Hel 7 Hel 000000001001000011001010 8 Hel 0100100001100101 9 Hel 0010010000110010 FixedString ConstVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 00100100001100101011011000110110001101111000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 @@ -310,12 +276,9 @@ FixedString ConstVector 33 Hello\0\0\0\0\0 00000000000000000000000000000000001001000011001010110110001101100011011110000000 39 Hello\0\0\0\0\0 00000000000000000000000000000000000000001001000011001010110110001101100011011110 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000100100001100101011011000110110001101111 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000010010000110010101101100011011000110111 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000001001000011001010110110001101100011011 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 9 Hello\0\0\0\0\0 00000000001001000011001010110110001101100011011110000000000000000000000000000000 --1 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hel\0\0\0\0\0\0\0 01001000011001010110110000000000000000000000000000000000000000000000000000000000 1 Hel\0\0\0\0\0\0\0 00100100001100101011011000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 @@ -332,8 +295,6 @@ FixedString ConstVector 33 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000001001000011001010110110000000000000000000000000 39 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000001001000011001010110110000000000000000000 40 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000100100001100101011011000000000000000000 -41 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000010010000110010101101100000000000000000 -42 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000001001000011001010110110000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 00000000001001000011001010110110000000000000000000000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql index 0ee04e408ba..40fccbc89e6 100644 --- a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql +++ b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql @@ -41,8 +41,6 @@ SELECT 37,'Hello',bin(bitShiftRight('Hello', 37)); SELECT 38,'Hello',bin(bitShiftRight('Hello', 38)); SELECT 39,'Hello',bin(bitShiftRight('Hello', 39)); SELECT 40,'Hello',bin(bitShiftRight('Hello', 40)); -SELECT 41,'Hello',bin(bitShiftRight('Hello', 41)); -SELECT 42,'Hello',bin(bitShiftRight('Hello', 42)); SELECT 'FixedString ConstConst'; SELECT bin(toFixedString('Hello', 10)) == bin(bitShiftRight(toFixedString('Hello', 10), 0)); @@ -93,40 +91,39 @@ SELECT 77,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 1 SELECT 78,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 78)); SELECT 79,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 79)); SELECT 80,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 80)); -SELECT 81,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 81)); DROP TABLE IF EXISTS test_bit_shift_right_string_integer; CREATE TABLE test_bit_shift_right_string_integer (str String, fixedStr FixedString(10), id Int64) engine=Log; -INSERT INTO test_bit_shift_right_string_integer VALUES('Hello','Hello',-1)('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hello','Hello',41),('Hello','Hello',42),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); +INSERT INTO test_bit_shift_right_string_integer VALUES('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'String VectorVector'; SELECT id as shift_right_bit,str as arg,bin(bitShiftRight(str, id)) as string_res FROM test_bit_shift_right_string_integer; SELECT id as shift_right_bit,str as arg,bin(bitShiftRight(str, id)) as string_res FROM test_bit_shift_right_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftRight('Hello', 42)); +SELECT bin(bitShiftRight('Hello', 40)); SELECT 'FixedString VectorVector'; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, id)) as fixed_string_res FROM test_bit_shift_right_string_integer; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, id)) as fixed_string_res FROM test_bit_shift_right_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'String VectorConst'; SELECT 7 as shift_right_bit,str as arg,bin(bitShiftRight(str, 7)) as string_res FROM test_bit_shift_right_string_integer; SELECT 8 as shift_right_bit,str as arg,bin(bitShiftRight(str, 8)) as string_res FROM test_bit_shift_right_string_integer; -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'FixedString VectorConst'; SELECT 7 as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, 7)) as fixed_string_res FROM test_bit_shift_right_string_integer; SELECT 8 as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, 8)) as fixed_string_res FROM test_bit_shift_right_string_integer; -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'String ConstVector'; SELECT id as shift_right_bit,'Hello' as arg,bin(bitShiftRight('Hello', id)) as string_res FROM test_bit_shift_right_string_integer; -SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftRight('Hel', id)) as string_res FROM test_bit_shift_right_string_integer; +SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftRight('Hel', id)) as string_res FROM test_bit_shift_right_string_integer WHERE id <= 8 * 3; -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'FixedString ConstVector'; SELECT id as shift_right_bit,toFixedString('Hello', 10) as arg,bin(bitShiftRight(toFixedString('Hello', 10), id)) as fixed_string_res FROM test_bit_shift_right_string_integer; SELECT id as shift_right_bit,toFixedString('Hel', 10) as arg,bin(bitShiftRight(toFixedString('Hel', 10), id)) as fixed_string_res FROM test_bit_shift_right_string_integer; diff --git a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference index ff5a09c0d48..a20c44bbb9a 100644 --- a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference +++ b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference @@ -41,8 +41,6 @@ String ConstConst 38 Hello 00010010000110010101101100011011000110111100000000000000000000000000000000000000 39 Hello 00100100001100101011011000110110001101111000000000000000000000000000000000000000 40 Hello -41 Hello -42 Hello FixedString ConstConst 1 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 @@ -92,10 +90,8 @@ FixedString ConstConst 78 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 79 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 80 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -81 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 String VectorVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 000000001001000011001010110110001101100011011110 7 Hello 001001000011001010110110001101100011011110000000 @@ -112,8 +108,6 @@ String VectorVector 33 Hello 00000000100100001100101011011000110110001101111000000000000000000000000000000000 39 Hello 00100100001100101011011000110110001101111000000000000000000000000000000000000000 40 Hello -41 Hello -42 Hello 7 Hel 00100100001100101011011000000000 8 Hel 01001000011001010110110000000000 9 Hel 0000000010010000110010101101100000000000 @@ -125,7 +119,6 @@ String VectorVector 9 Hel 0000000010010000110010101101100000000000 FixedString VectorVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 10010000110010101101100011011000110111100000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 @@ -142,8 +135,6 @@ FixedString VectorVector 33 Hello\0\0\0\0\0 11011110000000000000000000000000000000000000000000000000000000000000000000000000 39 Hello\0\0\0\0\0 10000000000000000000000000000000000000000000000000000000000000000000000000000000 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 11001010110110000000000000000000000000000000000000000000000000000000000000000000 @@ -171,9 +162,6 @@ String VectorConst 7 Hello 001001000011001010110110001101100011011110000000 7 Hello 001001000011001010110110001101100011011110000000 7 Hello 001001000011001010110110001101100011011110000000 -7 Hello 001001000011001010110110001101100011011110000000 -7 Hello 001001000011001010110110001101100011011110000000 -7 Hello 001001000011001010110110001101100011011110000000 7 Hel 00100100001100101011011000000000 7 Hel 00100100001100101011011000000000 7 Hel 00100100001100101011011000000000 @@ -193,9 +181,6 @@ String VectorConst 8 Hello 010010000110010101101100011011000110111100000000 8 Hello 010010000110010101101100011011000110111100000000 8 Hello 010010000110010101101100011011000110111100000000 -8 Hello 010010000110010101101100011011000110111100000000 -8 Hello 010010000110010101101100011011000110111100000000 -8 Hello 010010000110010101101100011011000110111100000000 8 Hel 01001000011001010110110000000000 8 Hel 01001000011001010110110000000000 8 Hel 01001000011001010110110000000000 @@ -217,9 +202,6 @@ FixedString VectorConst 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 @@ -239,15 +221,11 @@ FixedString VectorConst 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 -8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 -8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 -8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 String ConstVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 000000001001000011001010110110001101100011011110 7 Hello 001001000011001010110110001101100011011110000000 @@ -264,12 +242,9 @@ String ConstVector 33 Hello 00000000100100001100101011011000110110001101111000000000000000000000000000000000 39 Hello 00100100001100101011011000110110001101111000000000000000000000000000000000000000 40 Hello -41 Hello -42 Hello 7 Hello 001001000011001010110110001101100011011110000000 8 Hello 010010000110010101101100011011000110111100000000 9 Hello 00000000100100001100101011011000110110001101111000000000 --1 Hel 0 Hel 010010000110010101101100 1 Hel 00000000100100001100101011011000 7 Hel 00100100001100101011011000000000 @@ -280,20 +255,11 @@ String ConstVector 17 Hel 000000001001000011001010110110000000000000000000 23 Hel 001001000011001010110110000000000000000000000000 24 Hel -25 Hel -31 Hel -32 Hel -33 Hel -39 Hel -40 Hel -41 Hel -42 Hel 7 Hel 00100100001100101011011000000000 8 Hel 01001000011001010110110000000000 9 Hel 0000000010010000110010101101100000000000 FixedString ConstVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 10010000110010101101100011011000110111100000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 @@ -310,12 +276,9 @@ FixedString ConstVector 33 Hello\0\0\0\0\0 11011110000000000000000000000000000000000000000000000000000000000000000000000000 39 Hello\0\0\0\0\0 10000000000000000000000000000000000000000000000000000000000000000000000000000000 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 9 Hello\0\0\0\0\0 11001010110110001101100011011110000000000000000000000000000000000000000000000000 --1 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hel\0\0\0\0\0\0\0 01001000011001010110110000000000000000000000000000000000000000000000000000000000 1 Hel\0\0\0\0\0\0\0 10010000110010101101100000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 @@ -332,8 +295,6 @@ FixedString ConstVector 33 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 39 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 40 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -41 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -42 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 11001010110110000000000000000000000000000000000000000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql index 5c7a9901dae..a8e66eda281 100644 --- a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql +++ b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql @@ -41,8 +41,6 @@ SELECT 37,'Hello',bin(bitShiftLeft('Hello', 37)); SELECT 38,'Hello',bin(bitShiftLeft('Hello', 38)); SELECT 39,'Hello',bin(bitShiftLeft('Hello', 39)); SELECT 40,'Hello',bin(bitShiftLeft('Hello', 40)); -SELECT 41,'Hello',bin(bitShiftLeft('Hello', 41)); -SELECT 42,'Hello',bin(bitShiftLeft('Hello', 42)); SELECT 'FixedString ConstConst'; SELECT bin(toFixedString('Hello', 10)) == bin(bitShiftLeft(toFixedString('Hello', 10), 0)); @@ -93,40 +91,39 @@ SELECT 77,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10 SELECT 78,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 78)); SELECT 79,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 79)); SELECT 80,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 80)); -SELECT 81,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 81)); DROP TABLE IF EXISTS test_bit_shift_left_string_integer; CREATE TABLE test_bit_shift_left_string_integer (str String, fixedStr FixedString(10), id Int64) engine=Log; -INSERT INTO test_bit_shift_left_string_integer VALUES('Hello','Hello',-1)('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hello','Hello',41),('Hello','Hello',42),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); +INSERT INTO test_bit_shift_left_string_integer VALUES('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'String VectorVector'; SELECT id as shift_right_bit,str as arg,bin(bitShiftLeft(str, id)) as string_res FROM test_bit_shift_left_string_integer; SELECT id as shift_right_bit,str as arg,bin(bitShiftLeft(str, id)) as string_res FROM test_bit_shift_left_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftLeft('Hello', 42)); +SELECT bin(bitShiftLeft('Hello', 40)); SELECT 'FixedString VectorVector'; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, id)) as fixed_string_res FROM test_bit_shift_left_string_integer; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, id)) as fixed_string_res FROM test_bit_shift_left_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'String VectorConst'; SELECT 7 as shift_right_bit,str as arg,bin(bitShiftLeft(str, 7)) as string_res FROM test_bit_shift_left_string_integer; SELECT 8 as shift_right_bit,str as arg,bin(bitShiftLeft(str, 8)) as string_res FROM test_bit_shift_left_string_integer; -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'FixedString VectorConst'; SELECT 7 as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, 7)) as fixed_string_res FROM test_bit_shift_left_string_integer; SELECT 8 as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, 8)) as fixed_string_res FROM test_bit_shift_left_string_integer; -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'String ConstVector'; SELECT id as shift_right_bit,'Hello' as arg,bin(bitShiftLeft('Hello', id)) as string_res FROM test_bit_shift_left_string_integer; -SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftLeft('Hel', id)) as string_res FROM test_bit_shift_left_string_integer; +SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftLeft('Hel', id)) as string_res FROM test_bit_shift_left_string_integer WHERE id <= 8 * 3; -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'FixedString ConstVector'; SELECT id as shift_right_bit,toFixedString('Hello', 10) as arg,bin(bitShiftLeft(toFixedString('Hello', 10), id)) as fixed_string_res FROM test_bit_shift_left_string_integer; SELECT id as shift_right_bit,toFixedString('Hel', 10) as arg,bin(bitShiftLeft(toFixedString('Hel', 10), id)) as fixed_string_res FROM test_bit_shift_left_string_integer; diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql index 9cfc6f00b91..a1a246593d8 100644 --- a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql @@ -1,21 +1,15 @@ SELECT bitShiftRight(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftRight(toUInt8(1), number) FROM numbers(8 + 1) FORMAT Null; SELECT bitShiftRight(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftRight('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftRight('hola', number) FROM numbers(4 * 8 + 1) FORMAT Null; SELECT bitShiftRight('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftRight(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftRight(toFixedString('hola', 8), number) FROM numbers(8 * 8 + 1) FORMAT Null; SELECT bitShiftRight(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftLeft(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftLeft(toUInt8(1), number) FROM numbers(8 + 1) FORMAT Null; SELECT bitShiftLeft(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftLeft('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftLeft('hola', number) FROM numbers(4 * 8 + 1) FORMAT Null; SELECT bitShiftLeft('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftLeft(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftLeft(toFixedString('hola', 8), number) FROM numbers(8 * 8 + 1) FORMAT Null; SELECT bitShiftLeft(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT 'OK'; \ No newline at end of file From d0506f0214e949426c41fea2d9ebd79813422fd8 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Mon, 1 Jul 2024 12:23:54 +0000 Subject: [PATCH 130/273] Fix more tests One of tests actually uncovered a casting error :) --- src/Functions/bitShiftLeft.cpp | 2 +- src/Functions/bitShiftRight.cpp | 2 +- tests/queries/0_stateless/02366_kql_func_binary.reference | 3 --- tests/queries/0_stateless/02366_kql_func_binary.sql | 3 --- .../0_stateless/02766_bitshift_with_const_arguments.sql | 2 +- utils/check-style/check-style | 1 - 6 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index 9d32e5b5ca4..3d496296ba9 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -25,7 +25,7 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); - else if (b < 0 || b > B(8 * sizeof(A))) + else if (b < 0 || static_cast(b) > 8 * sizeof(A)) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) << static_cast(b); diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 13b210a4f63..0b41493fc6d 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -26,7 +26,7 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); - else if (b < 0 || b > B(8 * sizeof(A))) + else if (b < 0 || static_cast(b) > 8 * sizeof(A)) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) >> static_cast(b); diff --git a/tests/queries/0_stateless/02366_kql_func_binary.reference b/tests/queries/0_stateless/02366_kql_func_binary.reference index 6276cd6d867..360c1aa9899 100644 --- a/tests/queries/0_stateless/02366_kql_func_binary.reference +++ b/tests/queries/0_stateless/02366_kql_func_binary.reference @@ -1,7 +1,4 @@ -- binary functions 4 7 -1 -1 -1 7 3 1 diff --git a/tests/queries/0_stateless/02366_kql_func_binary.sql b/tests/queries/0_stateless/02366_kql_func_binary.sql index 824022b564c..687f3afb5ee 100644 --- a/tests/queries/0_stateless/02366_kql_func_binary.sql +++ b/tests/queries/0_stateless/02366_kql_func_binary.sql @@ -1,8 +1,5 @@ set dialect='kusto'; print ' -- binary functions'; print binary_and(4,7), binary_or(4,7); -print binary_shift_left(1, 1) == binary_shift_left(1, 65); -print binary_shift_right(2, 1) == binary_shift_right(2, 65); -print binary_shift_right(binary_shift_left(1, 65), 65) == 1; print binary_xor(2, 5), bitset_count_ones(42); print bitset_count_ones(binary_shift_left(binary_and(4,7), 1)); diff --git a/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql b/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql index 6b2961f0555..91e8624057c 100644 --- a/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql +++ b/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql @@ -10,7 +10,7 @@ DROP TABLE IF EXISTS t1; CREATE TABLE t0 (vkey UInt32, pkey UInt32, c0 UInt32) engine = TinyLog; CREATE TABLE t1 (vkey UInt32) ENGINE = AggregatingMergeTree ORDER BY vkey; INSERT INTO t0 VALUES (15, 25000, 58); -SELECT ref_5.pkey AS c_2_c2392_6 FROM t0 AS ref_5 WHERE 'J[' < multiIf(ref_5.pkey IN ( SELECT 1 ), bitShiftLeft(multiIf(ref_5.c0 > NULL, '1', ')'), 40), NULL); +SELECT ref_5.pkey AS c_2_c2392_6 FROM t0 AS ref_5 WHERE 'J[' < multiIf(ref_5.pkey IN ( SELECT 1 ), bitShiftLeft(multiIf(ref_5.c0 > NULL, '1', ')'), 40), NULL); -- { serverError ARGUMENT_OUT_OF_BOUND } DROP TABLE t0; DROP TABLE t1; diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 31972894c3d..380656cd1ca 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -94,7 +94,6 @@ EXTERN_TYPES_EXCLUDES=( ErrorCodes::values[i] ErrorCodes::getErrorCodeByName ErrorCodes::Value - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT ) for extern_type in ${!EXTERN_TYPES[@]}; do type_of_extern=${EXTERN_TYPES[$extern_type]} From b0bbc9c8104ae36750f8216a1e197331f5c7d8ab Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 1 Jul 2024 16:00:32 +0200 Subject: [PATCH 131/273] Fix symlinks --- programs/keeper/clickhouse-keeper.cpp | 30 --------------------------- programs/keeper/keeper_main.cpp | 8 ++++++- 2 files changed, 7 insertions(+), 31 deletions(-) delete mode 100644 programs/keeper/clickhouse-keeper.cpp diff --git a/programs/keeper/clickhouse-keeper.cpp b/programs/keeper/clickhouse-keeper.cpp deleted file mode 100644 index f2f91930ac0..00000000000 --- a/programs/keeper/clickhouse-keeper.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include -#include "config_tools.h" - - -int mainEntryClickHouseKeeper(int argc, char ** argv); - -#if ENABLE_CLICKHOUSE_KEEPER_CLIENT -int mainEntryClickHouseKeeperClient(int argc, char ** argv); -#endif - -int main(int argc_, char ** argv_) -{ -#if ENABLE_CLICKHOUSE_KEEPER_CLIENT - - if (argc_ >= 2) - { - /// 'clickhouse-keeper --client ...' and 'clickhouse-keeper client ...' are OK - if (strcmp(argv_[1], "--client") == 0 || strcmp(argv_[1], "client") == 0) - { - argv_[1] = argv_[0]; - return mainEntryClickHouseKeeperClient(--argc_, argv_ + 1); - } - } - - if (argc_ > 0 && (strcmp(argv_[0], "clickhouse-keeper-client") == 0 || endsWith(argv_[0], "/clickhouse-keeper-client"))) - return mainEntryClickHouseKeeperClient(argc_, argv_); -#endif - - return mainEntryClickHouseKeeper(argc_, argv_); -} diff --git a/programs/keeper/keeper_main.cpp b/programs/keeper/keeper_main.cpp index a5bc5db7be8..ec9b84ce94b 100644 --- a/programs/keeper/keeper_main.cpp +++ b/programs/keeper/keeper_main.cpp @@ -339,7 +339,13 @@ bool isClickhouseApp(std::string_view app_suffix, std::vector & argv) } } - return false; + /// keeper suffix is default which will be used if no other app is detected + if (app_suffix == "keeper") + return false; + + /// Use app if clickhouse binary is run through symbolic link with name clickhouse-app + std::string app_name = "clickhouse-" + std::string(app_suffix); + return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name)); } /// Don't allow dlopen in the main ClickHouse binary, because it is harmful and insecure. From 12608d2090485e5cd98f82b78c64f7014e7e391c Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Mon, 1 Jul 2024 14:22:32 +0000 Subject: [PATCH 132/273] Improve exception text --- src/Functions/bitShiftLeft.cpp | 6 +++--- src/Functions/bitShiftRight.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index 3d496296ba9..645672c50e2 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -26,7 +26,7 @@ struct BitShiftLeftImpl if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); else if (b < 0 || static_cast(b) > 8 * sizeof(A)) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) << static_cast(b); else @@ -43,7 +43,7 @@ struct BitShiftLeftImpl UInt8 word_size = 8; size_t n = end - pos; if (b < 0 || b > B(word_size * n)) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); /// To prevent overflow if (static_cast(b) >= (static_cast(n) * word_size)) @@ -112,7 +112,7 @@ struct BitShiftLeftImpl UInt8 word_size = 8; size_t n = end - pos; if (b < 0 || b > B(word_size * n)) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); /// To prevent overflow if (static_cast(b) >= (static_cast(n) * word_size)) { diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 0b41493fc6d..2e9182d3fe6 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -27,7 +27,7 @@ struct BitShiftRightImpl if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); else if (b < 0 || static_cast(b) > 8 * sizeof(A)) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) >> static_cast(b); else @@ -59,7 +59,7 @@ struct BitShiftRightImpl UInt8 word_size = 8; size_t n = end - pos; if (b < 0 || b > B(word_size * n)) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); /// To prevent overflow if (static_cast(b) >= (static_cast(n) * word_size)) { @@ -99,7 +99,7 @@ struct BitShiftRightImpl UInt8 word_size = 8; size_t n = end - pos; if (b < 0 || b > B(word_size * n)) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a positive value and not greater than the bit width of the value to shift"); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); /// To prevent overflow if (static_cast(b) >= (static_cast(n) * word_size)) { From f596f0f66aa571afe3d762d938a812f52a8a9766 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Sun, 30 Jun 2024 23:59:08 +0200 Subject: [PATCH 133/273] add restriction for storage join --- src/Storages/StorageJoin.cpp | 5 ++- ...join_strictness_type_restriction.reference | 0 ...orage_join_strictness_type_restriction.sql | 42 +++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.reference create mode 100644 tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index d12e5b1a20b..eb58b9ec3f8 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -395,11 +395,14 @@ void registerStorageJoin(StorageFactory & factory) else if (kind_str == "full") { if (strictness == JoinStrictness::Any) - strictness = JoinStrictness::RightAny; + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ANY FULL JOINs are not implemented"); kind = JoinKind::Full; } } + if ((strictness == JoinStrictness::Semi || strictness == JoinStrictness::Anti) && (kind != JoinKind::Left && kind != JoinKind::Right)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, " SEMI|ANTI JOIN should be LEFT or RIGHT"); + if (kind == JoinKind::Comma) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter of storage Join must be LEFT or INNER or RIGHT or FULL (without quotes)."); diff --git a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.reference b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql new file mode 100644 index 00000000000..1c52f79db11 --- /dev/null +++ b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, ALL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, INNER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, OUTER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, ALL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, INNER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, OUTER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANY, OUTER, a); -- { serverError BAD_ARGUMENTS } From cc37cbdd176867ab444f22d58a2feb5297ef952c Mon Sep 17 00:00:00 2001 From: Han Fei Date: Mon, 1 Jul 2024 17:03:27 +0200 Subject: [PATCH 134/273] refine tests --- .../03197_storage_join_strictness_type_restriction.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql index 1c52f79db11..5aa3e4c2e0c 100644 --- a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql +++ b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql @@ -15,7 +15,7 @@ CREATE TABLE t1 ( a Int64, b Int64 -) Engine = Join(SEMI, OUTER, a); -- { serverError BAD_ARGUMENTS } +) Engine = Join(SEMI, FULL, a); -- { serverError BAD_ARGUMENTS } CREATE TABLE t1 ( @@ -33,10 +33,10 @@ CREATE TABLE t1 ( a Int64, b Int64 -) Engine = Join(ANTI, OUTER, a); -- { serverError BAD_ARGUMENTS } +) Engine = Join(ANTI, FULL, a); -- { serverError BAD_ARGUMENTS } CREATE TABLE t1 ( a Int64, b Int64 -) Engine = Join(ANY, OUTER, a); -- { serverError BAD_ARGUMENTS } +) Engine = Join(ANY, FULL, a); -- { serverError NOT_IMPLEMENTED } From adcaf117a1b2987aa47c08d06e5db0c177a191b8 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 1 Jul 2024 16:38:39 +0100 Subject: [PATCH 135/273] impl --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 6 +++--- src/Storages/MergeTree/MergeTreeSource.cpp | 9 ++++----- src/Storages/MergeTree/MergeTreeSource.h | 3 ++- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 0dacdc0b958..b35a2e6f220 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -382,7 +382,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); pipes.emplace_back(std::move(source)); } @@ -481,7 +481,7 @@ Pipe ReadFromMergeTree::readFromPool( pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); if (i == 0) source->addTotalRowsApprox(total_rows); @@ -593,7 +593,7 @@ Pipe ReadFromMergeTree::readInOrder( processor->addPartLevelToChunk(isQueryWithFinal()); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); if (set_rows_approx) source->addTotalRowsApprox(total_rows); diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp index fcf2dd76e3f..e323b9f9ee7 100644 --- a/src/Storages/MergeTree/MergeTreeSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSource.cpp @@ -133,9 +133,8 @@ private: }; #endif -MergeTreeSource::MergeTreeSource(MergeTreeSelectProcessorPtr processor_) - : ISource(processor_->getHeader()) - , processor(std::move(processor_)) +MergeTreeSource::MergeTreeSource(MergeTreeSelectProcessorPtr processor_, const std::string & log_name_) + : ISource(processor_->getHeader()), processor(std::move(processor_)), log_name(log_name_) { #if defined(OS_LINUX) if (processor->getSettings().use_asynchronous_read_from_pool) @@ -207,7 +206,7 @@ std::optional MergeTreeSource::tryGenerate() try { - OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"}; + OpenTelemetry::SpanHolder span{fmt::format("MergeTreeSource({})::tryGenerate", log_name)}; holder->setResult(processor->read()); } catch (...) @@ -222,7 +221,7 @@ std::optional MergeTreeSource::tryGenerate() } #endif - OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"}; + OpenTelemetry::SpanHolder span{fmt::format("MergeTreeSource({})::tryGenerate", log_name)}; return processReadResult(processor->read()); } diff --git a/src/Storages/MergeTree/MergeTreeSource.h b/src/Storages/MergeTree/MergeTreeSource.h index 655f0ee6ebe..fc39b4f9b09 100644 --- a/src/Storages/MergeTree/MergeTreeSource.h +++ b/src/Storages/MergeTree/MergeTreeSource.h @@ -12,7 +12,7 @@ struct ChunkAndProgress; class MergeTreeSource final : public ISource { public: - explicit MergeTreeSource(MergeTreeSelectProcessorPtr processor_); + explicit MergeTreeSource(MergeTreeSelectProcessorPtr processor_, const std::string & log_name_); ~MergeTreeSource() override; std::string getName() const override; @@ -30,6 +30,7 @@ protected: private: MergeTreeSelectProcessorPtr processor; + const std::string log_name; #if defined(OS_LINUX) struct AsyncReadingState; From d4312fe54b508c2f69fff15d724a16881e235f93 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 1 Jul 2024 21:55:45 +0200 Subject: [PATCH 136/273] Fix --- src/Interpreters/Cache/WriteBufferToFileSegment.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index e654d091561..dd038948adf 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include @@ -134,7 +134,7 @@ std::unique_ptr WriteBufferToFileSegment::getReadBufferImpl() if (file_segment->getDownloadedSize() > 0) return std::make_unique(file_segment->getPath()); else - return std::make_unique(); + return std::make_unique(); } } From 27e0e57054010446a530efc3eb02e85d09f7e9e2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 1 Jul 2024 22:47:36 +0200 Subject: [PATCH 137/273] Use ReadBufferFromFileBase instead of ReadBufferFromFile for reread_buffer_from_file --- src/Storages/MergeTree/MergeTask.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 7ab8fa2430a..c8f1a08128b 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -555,18 +555,18 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const if (!reread_buf) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot read temporary file {}", ctx->rows_sources_uncompressed_write_buf->getFileName()); - auto * reread_buffer_raw = dynamic_cast(reread_buf.get()); + auto * reread_buffer_raw = dynamic_cast(reread_buf.get()); if (!reread_buffer_raw) { const auto & reread_buf_ref = *reread_buf; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ReadBufferFromFile, but got {}", demangle(typeid(reread_buf_ref).name())); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ReadBufferFromFileBase, but got {}", demangle(typeid(reread_buf_ref).name())); } /// Move ownership from std::unique_ptr to std::unique_ptr for CompressedReadBufferFromFile. /// First, release ownership from unique_ptr to base type. reread_buf.release(); /// NOLINT(bugprone-unused-return-value,hicpp-ignored-remove-result): we already have the pointer value in `reread_buffer_raw` /// Then, move ownership to unique_ptr to concrete type. - std::unique_ptr reread_buffer_from_file(reread_buffer_raw); + std::unique_ptr reread_buffer_from_file(reread_buffer_raw); /// CompressedReadBufferFromFile expects std::unique_ptr as argument. ctx->rows_sources_read_buf = std::make_unique(std::move(reread_buffer_from_file)); From 2991e27183a7dfe0e60d944155759f15123d96a3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 22 Jun 2024 15:26:46 +0200 Subject: [PATCH 138/273] Parse user from URL for dashboard.html (useful for sharing) Signed-off-by: Azat Khuzhin --- programs/server/dashboard.html | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index b21d4b86314..45f988f7b1e 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -506,6 +506,14 @@ let user = 'default'; let password = ''; let add_http_cors_header = (location.protocol != 'file:'); +const current_url = new URL(window.location); +/// Substitute user name if it's specified in the query string +const user_from_url = current_url.searchParams.get('user'); +if (user_from_url) { + user = user_from_url; +} + + const errorCodeMessageMap = { 516: 'Error authenticating with database. Please check your connection params and try again.' } From aaffa64cdd34da31009c206a30307e7b5db91155 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 2 Jul 2024 10:30:45 +0200 Subject: [PATCH 139/273] Fix data race for Keeper snapshot queue --- src/Common/ConcurrentBoundedQueue.h | 8 +------- src/Coordination/KeeperDispatcher.cpp | 10 ++-------- src/Coordination/KeeperStateMachine.cpp | 8 +++++--- 3 files changed, 8 insertions(+), 18 deletions(-) diff --git a/src/Common/ConcurrentBoundedQueue.h b/src/Common/ConcurrentBoundedQueue.h index 922607da813..16b9488c98d 100644 --- a/src/Common/ConcurrentBoundedQueue.h +++ b/src/Common/ConcurrentBoundedQueue.h @@ -1,8 +1,6 @@ #pragma once #include -#include -#include #include #include #include @@ -200,22 +198,18 @@ public: */ bool finish() { - bool was_finished_before = false; - { std::lock_guard lock(queue_mutex); if (is_finished) return true; - was_finished_before = is_finished; is_finished = true; } pop_condition.notify_all(); push_condition.notify_all(); - - return was_finished_before; + return false; } /// Returns if queue is finished diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index b4389da082d..38893242a2b 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -319,19 +319,13 @@ void KeeperDispatcher::snapshotThread() { setThreadName("KeeperSnpT"); const auto & shutdown_called = keeper_context->isShutdownCalled(); - while (!shutdown_called) + CreateSnapshotTask task; + while (snapshots_queue.pop(task)) { - CreateSnapshotTask task; - if (!snapshots_queue.pop(task)) - break; - try { auto snapshot_file_info = task.create_snapshot(std::move(task.snapshot), /*execute_only_cleanup=*/shutdown_called); - if (shutdown_called) - break; - if (!snapshot_file_info) continue; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index e4d661dfe17..df152bbe0af 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -569,7 +569,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res snapshot_task.create_snapshot = [this, when_done](KeeperStorageSnapshotPtr && snapshot, bool execute_only_cleanup) { nuraft::ptr exception(nullptr); - bool ret = true; + bool ret = false; if (!execute_only_cleanup) { try @@ -599,7 +599,8 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res else { auto snapshot_buf = snapshot_manager.serializeSnapshotToBuffer(*snapshot); - auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(*snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); + auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk( + *snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); latest_snapshot_info = std::move(snapshot_info); latest_snapshot_buf = std::move(snapshot_buf); } @@ -612,13 +613,14 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res latest_snapshot_info->path); } } + + ret = true; } catch (...) { ProfileEvents::increment(ProfileEvents::KeeperSnapshotCreationsFailed); LOG_TRACE(log, "Exception happened during snapshot"); tryLogCurrentException(log); - ret = false; } } { From ee0c4093d461233cfb920a4a224292ea9529393b Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 2 Jul 2024 12:03:27 +0200 Subject: [PATCH 140/273] Update run.sh --- docker/test/stateless/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 3ce489b9e0e..7d6499cef5e 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -253,7 +253,7 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e - clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + timeout -s TERM --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt From 422b8dea317168ddc4aa4c0e14e0d0350ce0be81 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 2 Jul 2024 12:30:22 +0200 Subject: [PATCH 141/273] Add database_replicated_allow_heavy_create to settings changes --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index b0725340f46..70f94fe2ab0 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -59,6 +59,7 @@ static std::initializer_list Date: Tue, 2 Jul 2024 10:51:58 +0000 Subject: [PATCH 142/273] Fix support of non-const scale arguments in power function --- src/Functions/FunctionsRound.h | 171 ++++++++++++++---- .../03165_round_scale_as_column.reference | 13 ++ .../03165_round_scale_as_column.sql | 3 +- 3 files changed, 152 insertions(+), 35 deletions(-) diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index 08e257de8ac..d43f7f264b4 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -518,39 +518,105 @@ struct Dispatcher template static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr) { - const auto & value_col_typed = checkAndGetColumn>(*value_col); - auto col_res = ColumnVector::create(); - - typename ColumnVector::Container & vec_res = col_res->getData(); - vec_res.resize(value_col_typed.getData().size()); - - if (!vec_res.empty()) + // Non-const value argument: + const auto * value_col_typed = checkAndGetColumn>(value_col); + if (value_col_typed) { + auto col_res = ColumnVector::create(); + + typename ColumnVector::Container & vec_res = col_res->getData(); + vec_res.resize(value_col_typed->getData().size()); + + if (!vec_res.empty()) + { + // Const scale argument: + if (scale_col == nullptr || isColumnConst(*scale_col)) + { + auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); + if (scale_arg == 0) + { + size_t scale = 1; + FunctionRoundingImpl::apply(value_col_typed->getData(), scale, vec_res); + } + else if (scale_arg > 0) + { + size_t scale = intExp10(scale_arg); + FunctionRoundingImpl::apply(value_col_typed->getData(), scale, vec_res); + } + else + { + size_t scale = intExp10(-scale_arg); + FunctionRoundingImpl::apply(value_col_typed->getData(), scale, vec_res); + } + } + /// Non-const scale argument: + else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) + { + const auto & value_data = value_col_typed->getData(); + const auto & scale_data = scale_col_typed->getData(); + const size_t rows = value_data.size(); + + for (size_t i = 0; i < rows; ++i) + { + Int64 scale64 = scale_data[i]; + validateScale(scale64); + Scale raw_scale = scale64; + + if (raw_scale == 0) + { + size_t scale = 1; + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } + else if (raw_scale > 0) + { + size_t scale = intExp10(raw_scale); + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } + else + { + size_t scale = intExp10(-raw_scale); + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } + } + } + } + return col_res; + } + // Const value argument: + const auto * value_col_typed_const = checkAndGetColumnConst>(value_col); + if (value_col_typed_const) + { + const auto & value_data = value_col_typed_const->template getValue(); + // Const scale argument: + auto col_res = ColumnVector::create(); + typename ColumnVector::Container & vec_res = col_res->getData(); if (scale_col == nullptr || isColumnConst(*scale_col)) { + vec_res.resize(1); auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); if (scale_arg == 0) { size_t scale = 1; - FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); } else if (scale_arg > 0) { size_t scale = intExp10(scale_arg); - FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); } else { size_t scale = intExp10(-scale_arg); - FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); } } /// Non-const scale argument: else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) { - const auto & value_data = value_col_typed.getData(); const auto & scale_data = scale_col_typed->getData(); - const size_t rows = value_data.size(); + const size_t rows = scale_data.size(); + + vec_res.resize(rows); for (size_t i = 0; i < rows; ++i) { @@ -561,23 +627,23 @@ struct Dispatcher if (raw_scale == 0) { size_t scale = 1; - FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); } else if (raw_scale > 0) { size_t scale = intExp10(raw_scale); - FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); } else { size_t scale = intExp10(-raw_scale); - FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); } } } + return col_res; } - - return col_res; + return nullptr; } }; @@ -589,24 +655,64 @@ public: template static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr) { - const auto & value_col_typed = checkAndGetColumn>(*value_col); - const typename ColumnDecimal::Container & vec_src = value_col_typed.getData(); - - auto col_res = ColumnDecimal::create(vec_src.size(), value_col_typed.getScale()); - auto & vec_res = col_res->getData(); - - if (!vec_res.empty()) + // Non-const value argument: + const auto * value_col_typed = checkAndGetColumn>(value_col); + if (value_col_typed) { + const typename ColumnDecimal::Container & vec_src = value_col_typed->getData(); + + auto col_res = ColumnDecimal::create(vec_src.size(), value_col_typed->getScale()); + auto & vec_res = col_res->getData(); + vec_res.resize(vec_src.size()); + + if (!vec_res.empty()) + { + /// Const scale argument: + if (scale_col == nullptr || isColumnConst(*scale_col)) + { + auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); + DecimalRoundingImpl::apply(vec_src, value_col_typed->getScale(), vec_res, scale_arg); + } + /// Non-const scale argument: + else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) + { + const auto & scale = scale_col_typed->getData(); + const size_t rows = vec_src.size(); + + for (size_t i = 0; i < rows; ++i) + { + Int64 scale64 = scale[i]; + validateScale(scale64); + Scale raw_scale = scale64; + + DecimalRoundingImpl::applyOne(value_col_typed->getElement(i), value_col_typed->getScale(), + reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); + } + } + } + + return col_res; + } + // Const value argument: + const auto * value_col_typed_const = checkAndGetColumnConst>(value_col); + if (value_col_typed_const) + { + auto col = assert_cast*>(value_col_typed_const->getDataColumnPtr().get()); + const auto & value_data = value_col_typed_const->template getValue(); + // Const scale argument: if (scale_col == nullptr || isColumnConst(*scale_col)) { + auto col_res = ColumnDecimal::create(1, col->getScale()); auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); - DecimalRoundingImpl::apply(value_col_typed.getData(), value_col_typed.getScale(), vec_res, scale_arg); + DecimalRoundingImpl::applyOne(value_data, col->getScale(), reinterpret_cast::NativeT&>(col_res->getElement(0)), scale_arg); + return col_res; } - /// Non-const scale argument - else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) + /// Non-const scale argument: + if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) { const auto & scale = scale_col_typed->getData(); - const size_t rows = vec_src.size(); + const size_t rows = scale.size(); + auto col_res = ColumnDecimal::create(rows, col->getScale()); for (size_t i = 0; i < rows; ++i) { @@ -614,13 +720,13 @@ public: validateScale(scale64); Scale raw_scale = scale64; - DecimalRoundingImpl::applyOne(value_col_typed.getElement(i), value_col_typed.getScale(), + DecimalRoundingImpl::applyOne(value_data, col->getScale(), reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); } + return col_res; } } - - return col_res; + return nullptr; } }; @@ -671,9 +777,6 @@ public: using ScaleTypes = std::decay_t; using ScaleType = typename ScaleTypes::RightType; - if (isColumnConst(*value_arg.column) && !isColumnConst(*scale_column.column)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale column must be const for const data column"); - res = Dispatcher::template apply(value_arg.column.get(), scale_column.column.get()); return true; }; diff --git a/tests/queries/0_stateless/03165_round_scale_as_column.reference b/tests/queries/0_stateless/03165_round_scale_as_column.reference index 9ad25ed466a..e0c9b6959ee 100644 --- a/tests/queries/0_stateless/03165_round_scale_as_column.reference +++ b/tests/queries/0_stateless/03165_round_scale_as_column.reference @@ -2162,4 +2162,17 @@ CHECKPOINT2 10 1.6275 1.6275 1.6275 1.6275 1 1 +3 +3.1 +3.14 +3.142 +3.1416 +3.14159 +3.141593 +3.1415927 +3.14159265 +3.141592654 +42 +42.4 +42.42 1 diff --git a/tests/queries/0_stateless/03165_round_scale_as_column.sql b/tests/queries/0_stateless/03165_round_scale_as_column.sql index 229f705808d..adae36564b8 100644 --- a/tests/queries/0_stateless/03165_round_scale_as_column.sql +++ b/tests/queries/0_stateless/03165_round_scale_as_column.sql @@ -118,6 +118,7 @@ DROP TABLE tab; SELECT round(1, 1); SELECT round(materialize(1), materialize(1)); -SELECT round(1, materialize(1)); --{serverError ILLEGAL_COLUMN} +SELECT round(pi(), number) FROM numbers(10); +SELECT round(toDecimal32(42.42, 2), number) from numbers(3); SELECT round(materialize(1), 1); SELECT materialize(10.1) AS x, ceil(x, toUInt256(123)); --{serverError ILLEGAL_TYPE_OF_ARGUMENT} From 2ce564daa0a7eb98b46dbed50d2eddfff8a731c2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 2 Jul 2024 13:05:17 +0200 Subject: [PATCH 143/273] Make 01006_simpod_empty_part_single_column_write.sh always use vertical merge --- .../0_stateless/01006_simpod_empty_part_single_column_write.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh b/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh index 16ebf2e6e54..c3ad29d33a1 100755 --- a/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh +++ b/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh @@ -18,7 +18,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE table_with_empty_part ENGINE = MergeTree() ORDER BY id PARTITION BY id -SETTINGS vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0, remove_empty_parts = 0 +SETTINGS vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0, remove_empty_parts = 0, min_bytes_for_wide_part=0, min_bytes_for_full_part_storage = 0 " From a2626037bc6ed631758c364edcc096c983805b0c Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Jul 2024 14:15:59 +0200 Subject: [PATCH 144/273] Improve object storage tags in tests --- docker/test/stateful/run.sh | 4 ++++ docker/test/stateless/run.sh | 2 +- docker/test/stress/run.sh | 1 - tests/ci/stress_check.py | 3 +++ tests/clickhouse-test | 18 +++++++++++++++--- ...6_replace_partition_from_table_zookeeper.sh | 2 +- .../00632_get_sample_block_cache.sql | 2 +- ...0731_long_merge_tree_select_opened_files.sh | 2 +- ...3_system_columns_and_system_tables_long.sql | 2 +- .../0_stateless/00763_lock_buffer_long.sh | 2 +- .../01070_mutations_with_dependencies.sql | 2 +- .../01078_merge_tree_read_one_thread.sql | 2 +- .../01200_mutations_memory_consumption.sql | 2 +- .../0_stateless/01221_system_settings.sql | 2 +- .../0_stateless/01275_parallel_mv.sql.j2 | 4 ++-- .../01281_group_by_limit_memory_tracking.sh | 2 +- .../0_stateless/01293_optimize_final_force.sh | 2 +- .../0_stateless/01304_direct_io_long.sh | 2 +- .../01343_min_bytes_to_use_mmap_io.sql | 2 +- .../01344_min_bytes_to_use_mmap_io_index.sql | 2 +- .../0_stateless/01475_read_subcolumns.sql | 2 +- .../01475_read_subcolumns_storages.sh | 2 +- ...ce_condition_rename_clear_zookeeper_long.sh | 2 +- ...2_execute_merges_on_single_replica_long.sql | 2 +- .../0_stateless/01533_multiple_nested.sql | 2 +- .../01551_mergetree_read_in_order_spread.sql | 2 +- ...1605_adaptive_granularity_block_borders.sql | 4 ++-- .../01643_merge_tree_fsync_smoke.sql | 2 +- ...01643_replicated_merge_tree_fsync_smoke.sql | 2 +- ...5_normalize_create_alter_function_names.sql | 2 +- .../01810_max_part_removal_threads_long.sh | 2 +- .../02226_filesystem_cache_profile_events.sh | 2 +- .../02228_merge_tree_insert_memory_usage.sql | 4 ++-- ...33_optimize_aggregation_in_order_prefix.sql | 2 +- ...filesystem_cache_bypass_cache_threshold.sql | 2 +- .../02240_filesystem_query_cache.sql | 2 +- .../02240_system_filesystem_cache_table.sh | 2 +- ...241_filesystem_cache_on_write_operations.sh | 2 +- .../02242_system_filesystem_cache_log_table.sh | 2 +- .../0_stateless/02263_lazy_mark_load.sh | 2 +- .../0_stateless/02286_drop_filesystem_cache.sh | 2 +- .../02313_filesystem_cache_seeks.sh | 2 +- .../0_stateless/02336_sparse_columns_s3.sql | 2 +- .../0_stateless/02343_aggregation_pipeline.sql | 2 +- ..._with_external_aggregation_memory_usage.sql | 2 +- .../0_stateless/02361_fsync_profile_events.sh | 4 ++-- .../02381_client_prints_server_side_time.sh | 2 +- .../02454_create_table_with_custom_disk.sql | 2 +- .../02497_trace_events_stress_long.sh | 2 +- ...3_cache_on_write_with_small_segment_size.sh | 2 +- .../02521_aggregation_by_partitions.sql | 2 +- .../0_stateless/02532_send_logs_level_test.sh | 4 ++-- ...4_fix_grouping_sets_predicate_push_down.sql | 2 +- .../02560_vertical_merge_memory_usage.sql | 2 +- .../02582_async_reading_with_small_limit.sql | 2 +- .../02703_max_local_read_bandwidth.sh | 2 +- .../02703_max_local_write_bandwidth.sh | 2 +- .../0_stateless/02704_max_backup_bandwidth.sh | 2 +- .../0_stateless/02725_memory-for-merges.sql | 2 +- .../02731_zero_objects_in_metadata.sh | 2 +- ...stem_parts_columns_modification_time.sql.j2 | 4 ++-- .../02808_filesystem_cache_drop_query.sh | 2 +- .../02833_multiprewhere_extra_column.sql | 2 +- ...artition_with_duplicated_parts_zookeeper.sh | 2 +- ...933_change_cache_setting_without_restart.sh | 2 +- ...dynamically_change_filesystem_cache_size.sh | 2 +- .../03008_local_plain_rewritable.sh | 2 +- ...32_dynamically_resize_filesystem_cache_2.sh | 2 +- 68 files changed, 92 insertions(+), 74 deletions(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 09a9f51084b..2215ac2b37c 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -213,6 +213,10 @@ function run_tests() ADDITIONAL_OPTIONS+=('--s3-storage') fi + if [[ -n "$USE_AZURE_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--azure-blob-storage') + fi + if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then ADDITIONAL_OPTIONS+=('--db-engine=Ordinary') fi diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 3ce489b9e0e..b56394df97a 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -207,7 +207,7 @@ function run_tests() if [[ -n "$USE_AZURE_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then # to disable the same tests - ADDITIONAL_OPTIONS+=('--s3-storage') + ADDITIONAL_OPTIONS+=('--azure-blob-storage') # azurite is slow, but with these two settings it can be super slow ADDITIONAL_OPTIONS+=('--no-random-settings') ADDITIONAL_OPTIONS+=('--no-random-merge-tree-settings') diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 6d121ba4142..96f8ecb2fab 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -211,7 +211,6 @@ clickhouse-client --query "SYSTEM STOP THREAD FUZZER" stop_server # Let's enable S3 storage by default -export USE_S3_STORAGE_FOR_MERGE_TREE=1 export RANDOMIZE_OBJECT_KEY_TYPE=1 export ZOOKEEPER_FAULT_INJECTION=1 export THREAD_POOL_FAULT_INJECTION=1 diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index bf0281cae68..486bfc25e22 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -30,6 +30,9 @@ def get_additional_envs(check_name: str) -> List[str]: if "azure" in check_name: result.append("USE_AZURE_STORAGE_FOR_MERGE_TREE=1") + if "s3" in check_name: + result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1") + return result diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 36870d59c3a..c581d35a289 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -700,7 +700,9 @@ class FailureReason(enum.Enum): NO_LONG = "not running long tests" REPLICATED_DB = "replicated-database" NON_ATOMIC_DB = "database engine not Atomic" + OBJECT_STORAGE = "object-storage" S3_STORAGE = "s3-storage" + AZURE_BLOB_STORAGE = "azure-blob-storage" BUILD = "not running for current build" NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas" SHARED_MERGE_TREE = "no-shared-merge-tree" @@ -1226,13 +1228,17 @@ class TestCase: elif tags and ("no-s3-storage" in tags) and args.s3_storage: return FailureReason.S3_STORAGE + elif tags and ("no-azure-blob-storage" in tags) and args.azure_blob_storage: + return FailureReason.AZURE_BLOB_STORAGE + elif tags and ("no-object-storage" in tags) and (args.azure_blob_storage or args.s3_storage): + return FailureReason.OBJECT_STORAGE elif ( tags - and "no-s3-storage-with-slow-build" in tags - and args.s3_storage + and "no-object-storage-with-slow-build" in tags + and (args.s3_storage or args.azure_blob_storage) and BuildFlags.RELEASE not in args.build_flags ): - return FailureReason.S3_STORAGE + return FailureReason.OBJECT_STORAGE elif tags: for build_flag in args.build_flags: @@ -3099,6 +3105,12 @@ def parse_args(): default=False, help="Run tests over s3 storage", ) + parser.add_argument( + "--azure-blob-storage", + action="store_true", + default=False, + help="Run tests over azure blob storage", + ) parser.add_argument( "--no-random-settings", action="store_true", diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index ffbf4df4ba7..13146f2eab0 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-s3-storage +# Tags: zookeeper, no-object-storage # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) diff --git a/tests/queries/0_stateless/00632_get_sample_block_cache.sql b/tests/queries/0_stateless/00632_get_sample_block_cache.sql index c54ca0b084e..ae9b6bb7b2c 100644 --- a/tests/queries/0_stateless/00632_get_sample_block_cache.sql +++ b/tests/queries/0_stateless/00632_get_sample_block_cache.sql @@ -1,4 +1,4 @@ --- Tags: long, no-s3-storage, no-asan +-- Tags: long, no-object-storage, no-asan SET joined_subquery_requires_alias = 0; diff --git a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh index af746c43da9..5a4fd901f8d 100755 --- a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh +++ b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage, no-tsan +# Tags: long, no-object-storage, no-tsan # no-s3 because read FileOpen metric set -e diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql index 4613576cf4e..009fc0bbb9f 100644 --- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql +++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql @@ -1,4 +1,4 @@ --- Tags: long, no-s3-storage, no-random-merge-tree-settings +-- Tags: long, no-object-storage, no-random-merge-tree-settings SET output_format_pretty_row_numbers = 0; DROP TABLE IF EXISTS check_system_tables; diff --git a/tests/queries/0_stateless/00763_lock_buffer_long.sh b/tests/queries/0_stateless/00763_lock_buffer_long.sh index 046e4efaa85..2006d43cdd2 100755 --- a/tests/queries/0_stateless/00763_lock_buffer_long.sh +++ b/tests/queries/0_stateless/00763_lock_buffer_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage, no-msan, no-asan, no-tsan, no-debug +# Tags: long, no-object-storage, no-msan, no-asan, no-tsan, no-debug # Some kind of stress test, it doesn't make sense to test in a non-release build set -e diff --git a/tests/queries/0_stateless/01070_mutations_with_dependencies.sql b/tests/queries/0_stateless/01070_mutations_with_dependencies.sql index 813ebf3f5a7..4d1cd54306c 100644 --- a/tests/queries/0_stateless/01070_mutations_with_dependencies.sql +++ b/tests/queries/0_stateless/01070_mutations_with_dependencies.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-s3-storage +-- Tags: no-parallel, no-object-storage -- With s3 policy TTL TO DISK 'default' doesn't work (because we have no default, only 's3') drop table if exists ttl; diff --git a/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql b/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql index 3a05e4507a2..166f44df2a7 100644 --- a/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql +++ b/tests/queries/0_stateless/01078_merge_tree_read_one_thread.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage -- Output slightly different plan drop table if exists t; diff --git a/tests/queries/0_stateless/01200_mutations_memory_consumption.sql b/tests/queries/0_stateless/01200_mutations_memory_consumption.sql index 5019abc38ab..f2d071961ee 100644 --- a/tests/queries/0_stateless/01200_mutations_memory_consumption.sql +++ b/tests/queries/0_stateless/01200_mutations_memory_consumption.sql @@ -1,4 +1,4 @@ --- Tags: no-debug, no-parallel, long, no-s3-storage, no-random-settings, no-random-merge-tree-settings +-- Tags: no-debug, no-parallel, long, no-object-storage, no-random-settings, no-random-merge-tree-settings SET optimize_trivial_insert_select = 1; DROP TABLE IF EXISTS table_with_single_pk; diff --git a/tests/queries/0_stateless/01221_system_settings.sql b/tests/queries/0_stateless/01221_system_settings.sql index fcffd6c45fe..da0204b37bd 100644 --- a/tests/queries/0_stateless/01221_system_settings.sql +++ b/tests/queries/0_stateless/01221_system_settings.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage select * from system.settings where name = 'send_timeout'; select * from system.merge_tree_settings order by length(description) limit 1; diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 index 047b1cc3ee7..9d74474c1a4 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 +++ b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 @@ -1,5 +1,5 @@ --- Tags: no-s3-storage, no-parallel, no-fasttest --- no-s3-storage: s3 has 20 more threads +-- Tags: no-object-storage, no-parallel, no-fasttest +-- no-object-storage: s3 has 20 more threads -- no-parallel: it checks the number of threads, which can be lowered in presence of other queries -- avoid settings randomization by clickhouse-test diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh index e83e49dffef..33b8f413fd5 100755 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings, no-s3-storage, no-msan +# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings, no-object-storage, no-msan # Tag no-fasttest: max_memory_usage_for_user can interfere another queries running concurrently # Regression for MemoryTracker that had been incorrectly accounted diff --git a/tests/queries/0_stateless/01293_optimize_final_force.sh b/tests/queries/0_stateless/01293_optimize_final_force.sh index d3d3d3e1ac5..e838af8af9b 100755 --- a/tests/queries/0_stateless/01293_optimize_final_force.sh +++ b/tests/queries/0_stateless/01293_optimize_final_force.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, long, no-debug, no-s3-storage +# Tags: no-fasttest, long, no-debug, no-object-storage # This test is too slow with S3 storage and debug modes. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh index 97148dc268e..2e27c2f7728 100755 --- a/tests/queries/0_stateless/01304_direct_io_long.sh +++ b/tests/queries/0_stateless/01304_direct_io_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage-with-slow-build +# Tags: long, no-object-storage-with-slow-build CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql b/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql index 614629351ef..15c9ec16700 100644 --- a/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql +++ b/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage DROP TABLE IF EXISTS test_01343; CREATE TABLE test_01343 (x String) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; INSERT INTO test_01343 VALUES ('Hello, world'); diff --git a/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql b/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql index 2e5ec563641..76cb535dcb7 100644 --- a/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql +++ b/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage DROP TABLE IF EXISTS test_01344; CREATE TABLE test_01344 (x String, INDEX idx (x) TYPE set(10) GRANULARITY 1) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; INSERT INTO test_01344 VALUES ('Hello, world'); diff --git a/tests/queries/0_stateless/01475_read_subcolumns.sql b/tests/queries/0_stateless/01475_read_subcolumns.sql index 8d4e3cb779b..d6eec2f84a1 100644 --- a/tests/queries/0_stateless/01475_read_subcolumns.sql +++ b/tests/queries/0_stateless/01475_read_subcolumns.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-settings +-- Tags: no-object-storage, no-random-settings SET use_uncompressed_cache = 0; diff --git a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh index 5a30f9e0f08..f74f6755e59 100755 --- a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh +++ b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage +# Tags: no-object-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh index c3c87eeaf8b..6098c826e32 100755 --- a/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh +++ b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-s3-storage +# Tags: race, zookeeper, no-object-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql index 49ef9d8b79f..e53f4476ec6 100644 --- a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql +++ b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql @@ -1,4 +1,4 @@ --- Tags: long, replica, no-replicated-database, no-parallel, no-s3-storage +-- Tags: long, replica, no-replicated-database, no-parallel, no-object-storage -- Tag no-replicated-database: Fails due to additional replicas or shards -- Tag no-parallel: static zk path diff --git a/tests/queries/0_stateless/01533_multiple_nested.sql b/tests/queries/0_stateless/01533_multiple_nested.sql index 1a6f0ec395e..80e9fc7e2fb 100644 --- a/tests/queries/0_stateless/01533_multiple_nested.sql +++ b/tests/queries/0_stateless/01533_multiple_nested.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-merge-tree-settings +-- Tags: no-object-storage, no-random-merge-tree-settings -- no-s3 because read FileOpen metric DROP TABLE IF EXISTS nested; diff --git a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql index 95b46c69e83..b5ece08196e 100644 --- a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql +++ b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-merge-tree-settings +-- Tags: no-object-storage, no-random-merge-tree-settings DROP TABLE IF EXISTS data_01551; diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index 187ff5c37e1..9b96ce3e586 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -1,6 +1,6 @@ --- Tags: no-random-merge-tree-settings, no-tsan, no-debug, no-s3-storage +-- Tags: no-random-merge-tree-settings, no-tsan, no-debug, no-object-storage -- no-tsan: too slow --- no-s3-storage: for remote tables we use thread pool even when reading with one stream, so memory consumption is higher +-- no-object-storage: for remote tables we use thread pool even when reading with one stream, so memory consumption is higher SET use_uncompressed_cache = 0; SET allow_prefetched_read_pool_for_remote_filesystem=0; diff --git a/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql b/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql index dfc761e1764..f7622bcf98f 100644 --- a/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql +++ b/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage drop table if exists data_01643; diff --git a/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql b/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql index 54c30fa2b1a..992cc687c88 100644 --- a/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql +++ b/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-s3-storage +-- Tags: no-parallel, no-object-storage -- no-parallel -- for flaky check and to avoid "Removing leftovers from table" (for other tables) -- Temporarily skip warning 'table was created by another server at the same moment, will retry' diff --git a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql index be0f7e8b710..921d28e6399 100644 --- a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql +++ b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql @@ -1,4 +1,4 @@ --- Tags: zookeeper, no-replicated-database, no-parallel, no-s3-storage +-- Tags: zookeeper, no-replicated-database, no-parallel, no-object-storage drop table if exists x; diff --git a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh index 3782a7d3ad6..c38fc505fa8 100755 --- a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh +++ b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage +# Tags: long, no-object-storage # Because parallel parts removal disabled for s3 storage # NOTE: this done as not .sql since we need to Ordinary database diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh index 9d87542d84d..d0e61541b15 100755 --- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh +++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings, no-replicated-database +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings, no-replicated-database # set -x diff --git a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql index ca1ee2738c7..6d86d995143 100644 --- a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql +++ b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql @@ -1,5 +1,5 @@ --- Tags: long, no-parallel, no-s3-storage --- no-s3-storage: Avoid flakiness due to cache / buffer usage +-- Tags: long, no-parallel, no-object-storage +-- no-object-storage: Avoid flakiness due to cache / buffer usage SET insert_keeper_fault_injection_probability=0; -- to succeed this test can require too many retries due to 100 partitions, so disable fault injections -- regression for MEMORY_LIMIT_EXCEEDED error because of deferred final part flush diff --git a/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql b/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql index 8bc75040e5a..48af5ae0031 100644 --- a/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql +++ b/tests/queries/0_stateless/02233_optimize_aggregation_in_order_prefix.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0; diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql index ee92931ec54..b791ee18e82 100644 --- a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings +-- Tags: no-parallel, no-fasttest, no-object-storage, no-random-settings -- { echo } diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.sql b/tests/queries/0_stateless/02240_filesystem_query_cache.sql index a609702f22a..40c80e04697 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.sql +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings +-- Tags: no-parallel, no-fasttest, no-object-storage, no-random-settings -- { echo } diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh index 57b8cec7864..8faf0a08f1f 100755 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh index 1028fba76f5..f8e7b7e7e72 100755 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh index 7a665d81eab..fe016f5a27f 100755 --- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh +++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02263_lazy_mark_load.sh b/tests/queries/0_stateless/02263_lazy_mark_load.sh index 5f80d9d7f6d..f1602e47e01 100755 --- a/tests/queries/0_stateless/02263_lazy_mark_load.sh +++ b/tests/queries/0_stateless/02263_lazy_mark_load.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-settings, no-parallel +# Tags: no-object-storage, no-random-settings, no-parallel set -eo pipefail CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh index a2c9352b7aa..32c9e9cb060 100755 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh index fbaec1ffaa7..b54e3d7f805 100755 --- a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh +++ b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: long, no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02336_sparse_columns_s3.sql b/tests/queries/0_stateless/02336_sparse_columns_s3.sql index bf4622adedc..1dc1e980846 100644 --- a/tests/queries/0_stateless/02336_sparse_columns_s3.sql +++ b/tests/queries/0_stateless/02336_sparse_columns_s3.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest, no-s3-storage +-- Tags: no-parallel, no-fasttest, no-object-storage DROP TABLE IF EXISTS t_sparse_s3; diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.sql b/tests/queries/0_stateless/02343_aggregation_pipeline.sql index d73ac66763e..0f9dbd0247d 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.sql +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage -- produces different pipeline if enabled set enable_memory_bound_merging_of_aggregation_results = 0; diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index a5a3da82324..105fb500461 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -1,4 +1,4 @@ --- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-s3-storage +-- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-object-storage DROP TABLE IF EXISTS t_2354_dist_with_external_aggr; diff --git a/tests/queries/0_stateless/02361_fsync_profile_events.sh b/tests/queries/0_stateless/02361_fsync_profile_events.sh index e150d70b896..98c9cf9b7b4 100755 --- a/tests/queries/0_stateless/02361_fsync_profile_events.sh +++ b/tests/queries/0_stateless/02361_fsync_profile_events.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-merge-tree-settings -# Tag no-s3-storage: s3 does not have fsync +# Tags: no-object-storage, no-random-merge-tree-settings +# Tag no-object-storage: s3 does not have fsync CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02381_client_prints_server_side_time.sh b/tests/queries/0_stateless/02381_client_prints_server_side_time.sh index e6cd63da95d..81376ee3791 100755 --- a/tests/queries/0_stateless/02381_client_prints_server_side_time.sh +++ b/tests/queries/0_stateless/02381_client_prints_server_side_time.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-s3-storage +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-object-storage CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql b/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql index 6cb1c0774aa..a2d46cf6d1b 100644 --- a/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql +++ b/tests/queries/0_stateless/02454_create_table_with_custom_disk.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-replicated-database +-- Tags: no-object-storage, no-replicated-database DROP TABLE IF EXISTS test; diff --git a/tests/queries/0_stateless/02497_trace_events_stress_long.sh b/tests/queries/0_stateless/02497_trace_events_stress_long.sh index c111ed40a29..dfd2f12b55b 100755 --- a/tests/queries/0_stateless/02497_trace_events_stress_long.sh +++ b/tests/queries/0_stateless/02497_trace_events_stress_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel, no-tsan, no-asan, no-debug, no-s3-storage, no-fasttest, no-replicated-database +# Tags: long, no-parallel, no-tsan, no-asan, no-debug, no-object-storage, no-fasttest, no-replicated-database set -e diff --git a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh index 4f3fd0e54f6..5aeab4c746e 100755 --- a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh +++ b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings +# Tags: no-parallel, no-fasttest, no-object-storage, no-random-settings CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none diff --git a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql index 55723360c38..b4d31e234d8 100644 --- a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql +++ b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql @@ -1,4 +1,4 @@ --- Tags: long, no-s3-storage +-- Tags: long, no-object-storage SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0; diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh index 4afc6d4496b..71f42e2a6db 100755 --- a/tests/queries/0_stateless/02532_send_logs_level_test.sh +++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-debug -# - no-s3-storage - S3 has additional logging +# Tags: no-object-storage, no-debug +# - no-object-storage - S3 has additional logging # - no-debug - debug builds also has additional logging CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql index 0891f1aa8a2..f926b9037d2 100644 --- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql +++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage DROP TABLE IF EXISTS test_grouping_sets_predicate; diff --git a/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql b/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql index 785fb10f70b..361305bac6d 100644 --- a/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql +++ b/tests/queries/0_stateless/02560_vertical_merge_memory_usage.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage drop table if exists tvm; create table tvm (c0 UInt64, c1 UInt64, c2 UInt64, c3 UInt64, c4 UInt64, c5 UInt64, c6 UInt64, c7 UInt64, c8 UInt64, c9 UInt64, c10 UInt64, c11 UInt64, c12 UInt64, c13 UInt64, c14 UInt64, c15 UInt64, c16 UInt64, c17 UInt64, c18 UInt64, c19 UInt64, c20 UInt64, c21 UInt64, c22 UInt64, c23 UInt64, c24 UInt64, c25 UInt64, c26 UInt64, c27 UInt64, c28 UInt64, c29 UInt64, c30 UInt64, c31 UInt64, c32 UInt64, c33 UInt64, c34 UInt64, c35 UInt64, c36 UInt64, c37 UInt64, c38 UInt64, c39 UInt64, c40 UInt64, c41 UInt64, c42 UInt64, c43 UInt64, c44 UInt64, c45 UInt64, c46 UInt64, c47 UInt64, c48 UInt64, c49 UInt64, c50 UInt64, c51 UInt64, c52 UInt64, c53 UInt64, c54 UInt64, c55 UInt64, c56 UInt64, c57 UInt64, c58 UInt64, c59 UInt64, c60 UInt64, c61 UInt64, c62 UInt64, c63 UInt64, c64 UInt64, c65 UInt64, c66 UInt64, c67 UInt64, c68 UInt64, c69 UInt64, c70 UInt64, c71 UInt64, c72 UInt64, c73 UInt64, c74 UInt64, c75 UInt64, c76 UInt64, c77 UInt64, c78 UInt64, c79 UInt64, c80 UInt64, c81 UInt64, c82 UInt64, c83 UInt64, c84 UInt64, c85 UInt64, c86 UInt64, c87 UInt64, c88 UInt64, c89 UInt64, c90 UInt64, c91 UInt64, c92 UInt64, c93 UInt64, c94 UInt64, c95 UInt64, c96 UInt64, c97 UInt64, c98 UInt64, c99 UInt64, c100 UInt64, c101 UInt64, c102 UInt64, c103 UInt64, c104 UInt64, c105 UInt64, c106 UInt64, c107 UInt64, c108 UInt64, c109 UInt64, c110 UInt64, c111 UInt64, c112 UInt64, c113 UInt64, c114 UInt64, c115 UInt64, c116 UInt64, c117 UInt64, c118 UInt64, c119 UInt64, c120 UInt64, c121 UInt64, c122 UInt64, c123 UInt64, c124 UInt64, c125 UInt64, c126 UInt64, c127 UInt64, c128 UInt64, c129 UInt64, c130 UInt64, c131 UInt64, c132 UInt64, c133 UInt64, c134 UInt64, c135 UInt64, c136 UInt64, c137 UInt64, c138 UInt64, c139 UInt64, c140 UInt64, c141 UInt64, c142 UInt64, c143 UInt64, c144 UInt64, c145 UInt64, c146 UInt64, c147 UInt64, c148 UInt64, c149 UInt64, c150 UInt64, c151 UInt64, c152 UInt64, c153 UInt64, c154 UInt64, c155 UInt64, c156 UInt64, c157 UInt64, c158 UInt64, c159 UInt64, c160 UInt64, c161 UInt64, c162 UInt64, c163 UInt64, c164 UInt64, c165 UInt64, c166 UInt64, c167 UInt64, c168 UInt64, c169 UInt64, c170 UInt64, c171 UInt64, c172 UInt64, c173 UInt64, c174 UInt64, c175 UInt64, c176 UInt64, c177 UInt64, c178 UInt64, c179 UInt64, c180 UInt64, c181 UInt64, c182 UInt64, c183 UInt64, c184 UInt64, c185 UInt64, c186 UInt64, c187 UInt64, c188 UInt64, c189 UInt64, c190 UInt64, c191 UInt64, c192 UInt64, c193 UInt64, c194 UInt64, c195 UInt64, c196 UInt64, c197 UInt64, c198 UInt64, c199 UInt64, c200 UInt64, c201 UInt64, c202 UInt64, c203 UInt64, c204 UInt64, c205 UInt64, c206 UInt64, c207 UInt64, c208 UInt64, c209 UInt64, c210 UInt64, c211 UInt64, c212 UInt64, c213 UInt64, c214 UInt64, c215 UInt64, c216 UInt64, c217 UInt64, c218 UInt64, c219 UInt64, c220 UInt64, c221 UInt64, c222 UInt64, c223 UInt64, c224 UInt64, c225 UInt64, c226 UInt64, c227 UInt64, c228 UInt64, c229 UInt64, c230 UInt64, c231 UInt64, c232 UInt64, c233 UInt64, c234 UInt64, c235 UInt64, c236 UInt64, c237 UInt64, c238 UInt64, c239 UInt64, c240 UInt64, c241 UInt64, c242 UInt64, c243 UInt64, c244 UInt64, c245 UInt64, c246 UInt64, c247 UInt64, c248 UInt64, c249 UInt64, c250 UInt64, c251 UInt64, c252 UInt64, c253 UInt64, c254 UInt64, c255 UInt64, c256 UInt64, c257 UInt64, c258 UInt64, c259 UInt64, c260 UInt64, c261 UInt64, c262 UInt64, c263 UInt64, c264 UInt64, c265 UInt64, c266 UInt64, c267 UInt64, c268 UInt64, c269 UInt64, c270 UInt64, c271 UInt64, c272 UInt64, c273 UInt64, c274 UInt64, c275 UInt64, c276 UInt64, c277 UInt64, c278 UInt64, c279 UInt64, c280 UInt64, c281 UInt64, c282 UInt64, c283 UInt64, c284 UInt64, c285 UInt64, c286 UInt64, c287 UInt64, c288 UInt64, c289 UInt64, c290 UInt64, c291 UInt64, c292 UInt64, c293 UInt64, c294 UInt64, c295 UInt64, c296 UInt64, c297 UInt64, c298 UInt64, c299 UInt64) engine = MergeTree order by tuple() settings min_rows_for_wide_part = 10, min_bytes_for_wide_part=0, vertical_merge_algorithm_min_rows_to_activate=1; diff --git a/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql b/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql index cb6b1b6083e..406cab82183 100644 --- a/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql +++ b/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-object-storage SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0; diff --git a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh index c78cd202f1b..6f43c1ae869 100755 --- a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-settings, no-random-merge-tree-settings +# Tags: no-object-storage, no-random-settings, no-random-merge-tree-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh index 31cf6e9606e..4f6a300c5b3 100755 --- a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage +# Tags: no-object-storage CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh index 748bf856deb..8cb03a93a7a 100755 --- a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh +++ b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-s3-storage, no-random-settings, no-random-merge-tree-settings +# Tags: no-object-storage, no-random-settings, no-random-merge-tree-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02725_memory-for-merges.sql b/tests/queries/0_stateless/02725_memory-for-merges.sql index 1a8402dff4b..8e4d4f5b3e0 100644 --- a/tests/queries/0_stateless/02725_memory-for-merges.sql +++ b/tests/queries/0_stateless/02725_memory-for-merges.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage, no-random-merge-tree-settings +-- Tags: no-object-storage, no-random-merge-tree-settings -- We allocate a lot of memory for buffers when reading or writing to S3 DROP TABLE IF EXISTS 02725_memory_for_merges SYNC; diff --git a/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh b/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh index eef52002e36..78659b70129 100755 --- a/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh +++ b/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-s3-storage +# Tags: no-fasttest, no-object-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 b/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 index eee236ff681..1ca5cc0bb7e 100644 --- a/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 +++ b/tests/queries/0_stateless/02806_system_parts_columns_modification_time.sql.j2 @@ -1,5 +1,5 @@ --- Tags: no-s3-storage --- Tag: no-s3-storage because S3 updates metadata for the virtual link file on metadata disk (see CreateHardlinkOperation::execute() for details) +-- Tags: no-object-storage +-- Tag: no-object-storage because S3 updates metadata for the virtual link file on metadata disk (see CreateHardlinkOperation::execute() for details) set mutations_sync=1; diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh index b44f9e50513..8a4a2e906b0 100755 --- a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh +++ b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql b/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql index 3a751294cba..da2f050cf38 100644 --- a/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql +++ b/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-random-settings, no-random-merge-tree-settings, no-s3-storage +-- Tags: no-parallel, no-random-settings, no-random-merge-tree-settings, no-object-storage drop table if exists t_multi_prewhere; drop row policy if exists policy_02834 on t_multi_prewhere; diff --git a/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh index edfed206d87..07d2ee27d22 100755 --- a/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh +++ b/tests/queries/0_stateless/02864_replace_partition_with_duplicated_parts_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-s3-storage +# Tags: zookeeper, no-object-storage # Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem # (it tries to do it as quick as possible, but it still performed in separate thread asynchronously) diff --git a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh index ddad7a1904b..76ada756f47 100755 --- a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh +++ b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage +# Tags: no-fasttest, no-parallel, no-object-storage CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh index 2e344a6b6e5..6f454da40da 100755 --- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh +++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_local_plain_rewritable.sh b/tests/queries/0_stateless/03008_local_plain_rewritable.sh index 5fac964a219..d51e180efc9 100755 --- a/tests/queries/0_stateless/03008_local_plain_rewritable.sh +++ b/tests/queries/0_stateless/03008_local_plain_rewritable.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-random-settings, no-s3-storage, no-replicated-database, no-shared-merge-tree +# Tags: no-random-settings, no-object-storage, no-replicated-database, no-shared-merge-tree # Tag no-random-settings: enable after root causing flakiness CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh index 526c4f84030..09bdd7f6b56 100755 --- a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh +++ b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings +# Tags: no-fasttest, no-parallel, no-object-storage, no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From ebacab6c986fd4bbd98ebf4761f082948d22b3cc Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Jul 2024 14:17:58 +0200 Subject: [PATCH 145/273] Bump From 452201caf943451f15cc14fb6dfb21de33166376 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Jul 2024 14:21:39 +0200 Subject: [PATCH 146/273] Black --- tests/clickhouse-test | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index c581d35a289..8486e3a885f 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1230,7 +1230,11 @@ class TestCase: return FailureReason.S3_STORAGE elif tags and ("no-azure-blob-storage" in tags) and args.azure_blob_storage: return FailureReason.AZURE_BLOB_STORAGE - elif tags and ("no-object-storage" in tags) and (args.azure_blob_storage or args.s3_storage): + elif ( + tags + and ("no-object-storage" in tags) + and (args.azure_blob_storage or args.s3_storage) + ): return FailureReason.OBJECT_STORAGE elif ( tags From 70a2061c9bccd85f6c939529609051c06042c563 Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 2 Jul 2024 12:27:01 +0000 Subject: [PATCH 147/273] Fixed bug and added test --- src/Disks/ObjectStorages/IObjectStorage.h | 1 + .../StorageObjectStorageSource.cpp | 41 ++++--- .../StorageObjectStorageSource.h | 6 +- .../03036_reading_s3_archives.reference | 104 +++++++++--------- .../0_stateless/03036_reading_s3_archives.sql | 30 ++--- 5 files changed, 96 insertions(+), 86 deletions(-) diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 9f5c14fdb7c..6410a9a7a73 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -75,6 +75,7 @@ struct RelativePathWithMetadata virtual std::string getPath() const { return relative_path; } virtual bool isArchive() const { return false; } virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); } + virtual size_t fileSizeInArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); } }; struct ObjectKeyWithMetadata diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index aef783fc3c4..9436e729683 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -196,13 +196,12 @@ Chunk StorageObjectStorageSource::generate() const auto & filename = object_info->getFileName(); chassert(object_info->metadata); VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( - chunk, read_from_format_info.requested_virtual_columns, - { - .path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), - .size = object_info->metadata->size_bytes, - .filename = &filename, - .last_modified = object_info->metadata->last_modified - }); + chunk, + read_from_format_info.requested_virtual_columns, + {.path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), + .size = object_info->isArchive() ? object_info->fileSizeInArchive() : object_info->metadata->size_bytes, + .filename = &filename, + .last_modified = object_info->metadata->last_modified}); return chunk; } @@ -690,10 +689,9 @@ static IArchiveReader::NameFilter createArchivePathFilter(const std::string & ar StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive::ObjectInfoInArchive( ObjectInfoPtr archive_object_, const std::string & path_in_archive_, - std::shared_ptr archive_reader_) - : archive_object(archive_object_) - , path_in_archive(path_in_archive_) - , archive_reader(archive_reader_) + std::shared_ptr archive_reader_, + IArchiveReader::FileInfo && file_info_) + : archive_object(archive_object_), path_in_archive(path_in_archive_), archive_reader(archive_reader_), file_info(file_info_) { } @@ -732,6 +730,7 @@ StorageObjectStorageSource::ObjectInfoPtr StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) { std::unique_lock lock{next_mutex}; + IArchiveReader::FileInfo current_file_info{}; while (true) { if (filter) @@ -756,6 +755,8 @@ StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) path_in_archive = file_enumerator->getFileName(); if (!filter(path_in_archive)) continue; + else + current_file_info = file_enumerator->getFileInfo(); } else { @@ -769,15 +770,19 @@ StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) archive_reader = createArchiveReader(archive_object); if (!archive_reader->fileExists(path_in_archive)) continue; + else + current_file_info = archive_reader->getFileInfo(path_in_archive); } - - auto object_in_archive = std::make_shared(archive_object, path_in_archive, archive_reader); - - if (read_keys != nullptr) - read_keys->push_back(object_in_archive); - - return object_in_archive; + break; } + + auto object_in_archive + = std::make_shared(archive_object, path_in_archive, archive_reader, std::move(current_file_info)); + + if (read_keys != nullptr) + read_keys->push_back(object_in_archive); + + return object_in_archive; } size_t StorageObjectStorageSource::ArchiveIterator::estimatedKeysCount() diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index d93097d2636..2cbe8a9776c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -259,7 +259,8 @@ public: ObjectInfoInArchive( ObjectInfoPtr archive_object_, const std::string & path_in_archive_, - std::shared_ptr archive_reader_); + std::shared_ptr archive_reader_, + IArchiveReader::FileInfo && file_info_); std::string getFileName() const override { @@ -278,9 +279,12 @@ public: bool isArchive() const override { return true; } + size_t fileSizeInArchive() const override { return file_info.uncompressed_size; } + const ObjectInfoPtr archive_object; const std::string path_in_archive; const std::shared_ptr archive_reader; + const IArchiveReader::FileInfo file_info; }; private: diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.reference b/tests/queries/0_stateless/03036_reading_s3_archives.reference index 36ced212a1b..eacf16d0295 100644 --- a/tests/queries/0_stateless/03036_reading_s3_archives.reference +++ b/tests/queries/0_stateless/03036_reading_s3_archives.reference @@ -1,52 +1,52 @@ -1 Str1 example1.csv test/03036_archive1.zip::example1.csv -2 Str2 example1.csv test/03036_archive1.zip::example1.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.zip::example3.csv -6 Str6 example3.csv test/03036_archive2.zip::example3.csv -3 Str3 example2.csv test/03036_archive1.zip::example2.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive1.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -1 Str1 example1.csv test/03036_archive1.zip::example1.csv -2 Str2 example1.csv test/03036_archive1.zip::example1.csv -3 Str3 example2.csv test/03036_archive1.zip::example2.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive1.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.zip::example3.csv -6 Str6 example3.csv test/03036_archive2.zip::example3.csv -1 Str1 example1.csv test/03036_archive1.tar::example1.csv -2 Str2 example1.csv test/03036_archive1.tar::example1.csv -7 Str7 example4.csv test/03036_archive1.tar::example4.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive1.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -9 Str9 example5.csv test/03036_archive2.tar::example5.csv -10 Str10 example5.csv test/03036_archive2.tar::example5.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -11 Str11 example6.csv test/03036_archive3.tar.gz::example6.csv -12 Str12 example6.csv test/03036_archive3.tar.gz::example6.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -9 Str9 example5.csv test/03036_archive2.tar::example5.csv -10 Str10 example5.csv test/03036_archive2.tar::example5.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -13 Str13 example7.csv test/03036_compressed_file_archive.zip::example7.csv -14 Str14 example7.csv test/03036_compressed_file_archive.zip::example7.csv +1 Str1 25 example1.csv test/03036_archive1.zip::example1.csv +2 Str2 25 example1.csv test/03036_archive1.zip::example1.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.zip::example3.csv +6 Str6 25 example3.csv test/03036_archive2.zip::example3.csv +3 Str3 25 example2.csv test/03036_archive1.zip::example2.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive1.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +1 Str1 25 example1.csv test/03036_archive1.zip::example1.csv +2 Str2 25 example1.csv test/03036_archive1.zip::example1.csv +3 Str3 25 example2.csv test/03036_archive1.zip::example2.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive1.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.zip::example3.csv +6 Str6 25 example3.csv test/03036_archive2.zip::example3.csv +1 Str1 25 example1.csv test/03036_archive1.tar::example1.csv +2 Str2 25 example1.csv test/03036_archive1.tar::example1.csv +7 Str7 25 example4.csv test/03036_archive1.tar::example4.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive1.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +9 Str9 27 example5.csv test/03036_archive2.tar::example5.csv +10 Str10 27 example5.csv test/03036_archive2.tar::example5.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +11 Str11 29 example6.csv test/03036_archive3.tar.gz::example6.csv +12 Str12 29 example6.csv test/03036_archive3.tar.gz::example6.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +9 Str9 27 example5.csv test/03036_archive2.tar::example5.csv +10 Str10 27 example5.csv test/03036_archive2.tar::example5.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +13 Str13 57 example7.csv test/03036_compressed_file_archive.zip::example7.csv +14 Str14 57 example7.csv test/03036_compressed_file_archive.zip::example7.csv diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.sql b/tests/queries/0_stateless/03036_reading_s3_archives.sql index 00d7cc25e1a..43bda4ee704 100644 --- a/tests/queries/0_stateless/03036_reading_s3_archives.sql +++ b/tests/queries/0_stateless/03036_reading_s3_archives.sql @@ -1,22 +1,22 @@ -- Tags: no-fasttest -- Tag no-fasttest: Depends on AWS -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive1.zip :: example1.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive2.zip :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example2.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example*') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive1.tar :: example1.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar :: example4.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive2.tar :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar.gz :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv') ORDER BY (id, _file, _path); -select id, data, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } -select id, data, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent{2..3}.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive1.zip :: example1.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive2.zip :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example2.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example*') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive1.tar :: example1.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar :: example4.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive2.tar :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar.gz :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv') ORDER BY (id, _file, _path); +select id, data, _size, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +select id, data, _size, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent{2..3}.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } CREATE TABLE table_zip22 Engine S3(s3_conn, filename='03036_archive2.zip :: example2.csv'); -select id, data, _file, _path from table_zip22 ORDER BY (id, _file, _path); +select id, data, _size, _file, _path from table_zip22 ORDER BY (id, _file, _path); CREATE table table_tar2star Engine S3(s3_conn, filename='03036_archive2.tar :: example*.csv'); -SELECT id, data, _file, _path FROM table_tar2star ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM table_tar2star ORDER BY (id, _file, _path); CREATE table table_tarstarglobs Engine S3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv'); -SELECT id, data, _file, _path FROM table_tarstarglobs ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM table_tarstarglobs ORDER BY (id, _file, _path); CREATE table table_noexist Engine s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError UNKNOWN_STORAGE } -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) From f2244853164906cbf6faf186b7f3782ccc504d5a Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 2 Jul 2024 13:01:33 +0000 Subject: [PATCH 148/273] Add reference to documentation --- docs/en/sql-reference/table-functions/s3.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 1a7e2b8d66a..35e5d86034c 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -269,9 +269,9 @@ FROM s3( ## Virtual Columns {#virtual-columns} -- `_path` — Path to the file. Type: `LowCardinalty(String)`. -- `_file` — Name of the file. Type: `LowCardinalty(String)`. -- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. +- `_path` — Path to the file. Type: `LowCardinalty(String)`. In case of archive, shows path in a format: "{path_to_archive}::{path_to_file_inside_archive}" +- `_file` — Name of the file. Type: `LowCardinalty(String)`. In case of archive shows name of the file inside the archive. +- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive. - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Storage Settings {#storage-settings} From 06e235024f7b33c21be1ef2dc6210b40aabe7921 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 2 Jul 2024 15:16:57 +0200 Subject: [PATCH 149/273] work with review --- .../Transforms/ApplySquashingTransform.h | 2 +- .../DeduplicationTokenTransforms.cpp | 29 ++++++++----------- .../Transforms/DeduplicationTokenTransforms.h | 19 ++++++------ .../Transforms/SquashingTransform.cpp | 8 ++--- .../Transforms/buildPushingToViewsChain.cpp | 16 +++++----- src/Server/TCPHandler.cpp | 4 +-- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 10 ++++--- src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 6 ++-- src/Storages/MergeTree/MutateTask.cpp | 4 +-- .../MergeTree/ReplicatedMergeTreeSink.cpp | 5 ++-- src/Storages/WindowView/StorageWindowView.cpp | 6 ++-- 12 files changed, 55 insertions(+), 56 deletions(-) diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 94b890198d4..49a6581e685 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -32,7 +32,7 @@ public: protected: void onConsume(Chunk chunk) override { - cur_chunk = DB::Squashing::squash(std::move(chunk)); + cur_chunk = Squashing::squash(std::move(chunk)); } GenerateResult onGenerate() override diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index 374a6495f79..f50e69e730f 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -56,7 +56,7 @@ String TokenInfo::debugToken() const void TokenInfo::addChunkHash(String part) { - if (stage == UNDEFINED) + if (stage == UNDEFINED && empty()) stage = DEFINE_SOURCE_WITH_HASHES; if (stage != DEFINE_SOURCE_WITH_HASHES) @@ -65,7 +65,7 @@ void TokenInfo::addChunkHash(String part) addTokenPart(std::move(part)); } -void TokenInfo::defineSourceWithChunkHashes() +void TokenInfo::finishChunkHashes() { if (stage == UNDEFINED && empty()) stage = DEFINE_SOURCE_WITH_HASHES; @@ -78,7 +78,7 @@ void TokenInfo::defineSourceWithChunkHashes() void TokenInfo::setUserToken(const String & token) { - if (stage == UNDEFINED) + if (stage == UNDEFINED && empty()) stage = DEFINE_SOURCE_USER_TOKEN; if (stage != DEFINE_SOURCE_USER_TOKEN) @@ -87,7 +87,7 @@ void TokenInfo::setUserToken(const String & token) addTokenPart(fmt::format("user-token-{}", token)); } -void TokenInfo::defineSourceWithUserToken(size_t block_number) +void TokenInfo::setSourceWithUserToken(size_t block_number) { if (stage != DEFINE_SOURCE_USER_TOKEN) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); @@ -108,7 +108,7 @@ void TokenInfo::setViewID(const String & id) addTokenPart(fmt::format("view-id-{}", id)); } -void TokenInfo::defineViewID(size_t block_number) +void TokenInfo::setViewBlockNumber(size_t block_number) { if (stage != DEFINE_VIEW) throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); @@ -138,6 +138,7 @@ size_t TokenInfo::getTotalSize() const for (const auto & part : parts) size += part.size(); + // we reserve more size here to be able to add delimenter between parts. return size + parts.size() - 1; } @@ -149,17 +150,11 @@ void CheckTokenTransform::transform(Chunk & chunk) if (!token_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); - if (!must_be_present) - { - LOG_DEBUG(log, "{}, no token required, token {}", debug, token_info->debugToken()); - return; - } - LOG_DEBUG(log, "debug: {}, token: {}", debug, token_info->debugToken()); } #endif -String SetInitialTokenTransform::getChunkHash(const Chunk & chunk) +String DefineSourceWithChunkHashesTransform::getChunkHash(const Chunk & chunk) { SipHash hash; for (const auto & colunm : chunk.getColumns()) @@ -170,20 +165,20 @@ String SetInitialTokenTransform::getChunkHash(const Chunk & chunk) } -void SetInitialTokenTransform::transform(Chunk & chunk) +void DefineSourceWithChunkHashesTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); if (!token_info) throw Exception( ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetInitialTokenTransform"); + "TokenInfo is expected for consumed chunk in DefineSourceWithChunkHashesTransform"); if (token_info->isDefined()) return; token_info->addChunkHash(getChunkHash(chunk)); - token_info->defineSourceWithChunkHashes(); + token_info->finishChunkHashes(); } void SetUserTokenTransform::transform(Chunk & chunk) @@ -203,7 +198,7 @@ void SetSourceBlockNumberTransform::transform(Chunk & chunk) throw Exception( ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform"); - token_info->defineSourceWithUserToken(block_number++); + token_info->setSourceWithUserToken(block_number++); } void SetViewIDTransform::transform(Chunk & chunk) @@ -223,7 +218,7 @@ void SetViewBlockNumberTransform::transform(Chunk & chunk) throw Exception( ErrorCodes::LOGICAL_ERROR, "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform"); - token_info->defineViewID(block_number++); + token_info->setViewBlockNumber(block_number++); } void ResetTokenTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 9d087536a38..79d168d1000 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -42,13 +42,13 @@ namespace DeduplicationToken bool isDefined() const { return stage == DEFINED; } void addChunkHash(String part); - void defineSourceWithChunkHashes(); + void finishChunkHashes(); void setUserToken(const String & token); - void defineSourceWithUserToken(size_t block_number); + void setSourceWithUserToken(size_t block_number); void setViewID(const String & id); - void defineViewID(size_t block_number); + void setViewBlockNumber(size_t block_number); void reset(); @@ -98,10 +98,9 @@ namespace DeduplicationToken class CheckTokenTransform : public ISimpleTransform { public: - CheckTokenTransform(String debug_, bool must_be_present_, const Block & header_) + CheckTokenTransform(String debug_, const Block & header_) : ISimpleTransform(header_, header_, true) , debug(std::move(debug_)) - , must_be_present(must_be_present_) { } @@ -112,7 +111,6 @@ namespace DeduplicationToken private: String debug; LoggerPtr log = getLogger("CheckInsertDeduplicationTokenTransform"); - bool must_be_present = false; }; #endif @@ -134,16 +132,19 @@ namespace DeduplicationToken }; - class SetInitialTokenTransform : public ISimpleTransform + class DefineSourceWithChunkHashesTransform : public ISimpleTransform { public: - explicit SetInitialTokenTransform(const Block & header_) + explicit DefineSourceWithChunkHashesTransform(const Block & header_) : ISimpleTransform(header_, header_, true) { } - String getName() const override { return "DeduplicationToken::SetInitialTokenTransform"; } + String getName() const override { return "DeduplicationToken::DefineSourceWithChunkHashesTransform"; } + // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with heshes from the parts. + // But if there is some table with different engine, we still need to define the source of the data in deduplication token + // We use that transform to define the source as a hash of entire block in deduplication token void transform(Chunk & chunk) override; static String getChunkHash(const Chunk & chunk); diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index e457a262681..1fb4433240a 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -18,7 +18,7 @@ SquashingTransform::SquashingTransform( void SquashingTransform::onConsume(Chunk chunk) { - cur_chunk = DB::Squashing::squash(squashing.add(std::move(chunk))); + cur_chunk = Squashing::squash(squashing.add(std::move(chunk))); } SquashingTransform::GenerateResult SquashingTransform::onGenerate() @@ -31,7 +31,7 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate() void SquashingTransform::onFinish() { - finish_chunk = DB::Squashing::squash(squashing.flush()); + finish_chunk = Squashing::squash(squashing.flush()); } void SquashingTransform::work() @@ -63,14 +63,14 @@ void SimpleSquashingTransform::transform(Chunk & chunk) { if (!finished) { - chunk = DB::Squashing::squash(squashing.add(std::move(chunk))); + chunk = Squashing::squash(squashing.add(std::move(chunk))); } else { if (chunk.hasRows()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - chunk = DB::Squashing::squash(squashing.flush()); + chunk = Squashing::squash(squashing.flush()); } } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 713ab25600f..8d38396ecd5 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -357,7 +357,7 @@ std::optional generateViewChain( } #ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Before squashing", !disable_deduplication_for_children, out.getInputHeader())); + out.addSource(std::make_shared("Before squashing", out.getInputHeader())); #endif auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); @@ -403,7 +403,7 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { #ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Right after Inner query", !disable_deduplication_for_children, out.getInputHeader())); + out.addSource(std::make_shared("Right after Inner query", out.getInputHeader())); #endif auto executing_inner_query = std::make_shared( @@ -413,7 +413,7 @@ std::optional generateViewChain( out.addSource(std::move(executing_inner_query)); #ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Right before Inner query", !disable_deduplication_for_children, out.getInputHeader())); + out.addSource(std::make_shared("Right before Inner query", out.getInputHeader())); #endif } @@ -547,7 +547,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (auto * window_view = dynamic_cast(storage.get())) { @@ -555,7 +555,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (dynamic_cast(storage.get())) { @@ -564,7 +564,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } /// Do not push to destination table if the flag is set else if (!no_destination) @@ -573,13 +573,13 @@ Chain buildPushingToViewsChain( metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); sink->setRuntimeData(thread_status, elapsed_counter_ms); - result_chain.addSource(std::make_shared(sink->getHeader())); + result_chain.addSource(std::make_shared(sink->getHeader())); result_chain.addSource(std::move(sink)); } else { - result_chain.addSource(std::make_shared(storage_header)); + result_chain.addSource(std::make_shared(storage_header)); } if (result_chain.empty()) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a705ae2e013..ee38b7242b1 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -889,7 +889,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro while (readDataNext()) { squashing.setHeader(state.block_for_insert.cloneEmpty()); - auto result_chunk = DB::Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()})); + auto result_chunk = Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()})); if (result_chunk) { auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); @@ -901,7 +901,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - Chunk result_chunk = DB::Squashing::squash(squashing.flush()); + Chunk result_chunk = Squashing::squash(squashing.flush()); if (!result_chunk) { return insert_queue.pushQueryWithBlock(state.parsed_query, squashing.getHeader(), query_context); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 63858ce601d..429fd8b67c5 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -2322,12 +2322,11 @@ String IMergeTreeDataPart::getUniqueId() const return getDataPartStorage().getUniqueId(); } -String IMergeTreeDataPart::getPartBlockIDHash() const +UInt128 IMergeTreeDataPart::getPartBlockIDHash() const { SipHash hash; checksums.computeTotalChecksumDataOnly(hash); - const auto hash_value = hash.get128(); - return toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); + return hash.get128(); } String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const @@ -2336,7 +2335,10 @@ String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get block id for non zero level part {}", name); if (token.empty()) - return info.partition_id + "_" + getPartBlockIDHash(); + { + const auto hash_value = getPartBlockIDHash(); + return info.partition_id + "_" + toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); + } SipHash hash; hash.update(token.data(), token.size()); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 204dfdaad0a..dbb1df3cfe8 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -210,7 +210,7 @@ public: /// Compute part block id for zero level part. Otherwise throws an exception. /// If token is not empty, block id is calculated based on it instead of block data - String getPartBlockIDHash() const; + UInt128 getPartBlockIDHash() const; String getZeroLevelPartBlockID(std::string_view token) const; void setName(const String & new_name); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 7bc04c05a1c..4a1163d2317 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -126,7 +126,8 @@ void MergeTreeSink::consume(Chunk & chunk) if (!token_info->isDefined()) { chassert(temp_part.part); - token_info->addChunkHash(temp_part.part->getPartBlockIDHash()); + const auto hash_value = temp_part.part->getPartBlockIDHash(); + token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); } if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) @@ -167,7 +168,7 @@ void MergeTreeSink::consume(Chunk & chunk) if (!token_info->isDefined()) { - token_info->defineSourceWithChunkHashes(); + token_info->finishChunkHashes(); } finishDelayedChunk(); @@ -206,7 +207,6 @@ void MergeTreeSink::finishDelayedChunk() if (settings.insert_deduplicate && deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); - auto res = deduplication_log->addPart(block_id, part->info); if (!res.second) { diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 5da36b6ee3b..3dbcb5e5bda 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1317,7 +1317,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() Block block_to_squash = projection.calculate(cur_block, ctx->context); projection_squashes[i].setHeader(block_to_squash.cloneEmpty()); - Chunk squashed_chunk = DB::Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); + Chunk squashed_chunk = Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); if (squashed_chunk) { auto result = projection_squashes[i].getHeader().cloneWithColumns(squashed_chunk.detachColumns()); @@ -1341,7 +1341,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash_plan = projection_squashes[i]; - auto squashed_chunk = DB::Squashing::squash(projection_squash_plan.flush()); + auto squashed_chunk = Squashing::squash(projection_squash_plan.flush()); if (squashed_chunk) { auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns()); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 228b5c596ab..3677f5b02ab 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -374,7 +374,8 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if (!token_info->isDefined()) { chassert(temp_part.part); - token_info->addChunkHash(temp_part.part->getPartBlockIDHash()); + const auto hash_value = temp_part.part->getPartBlockIDHash(); + token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); } } @@ -423,7 +424,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if (!token_info->isDefined()) { - token_info->defineSourceWithChunkHashes(); + token_info->finishChunkHashes(); } finishDelayedChunk(zookeeper); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index ccb6259da00..e36247103c7 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1558,7 +1558,7 @@ void StorageWindowView::writeIntoWindowView( #ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { - return std::make_shared("StorageWindowView: Afrer tmp table before squashing", true, stream_header); + return std::make_shared("StorageWindowView: Afrer tmp table before squashing", stream_header); }); #endif @@ -1604,7 +1604,7 @@ void StorageWindowView::writeIntoWindowView( #ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { - return std::make_shared("StorageWindowView: Afrer WatermarkTransform", true, stream_header); + return std::make_shared("StorageWindowView: Afrer WatermarkTransform", stream_header); }); #endif @@ -1630,7 +1630,7 @@ void StorageWindowView::writeIntoWindowView( #ifdef ABORT_ON_LOGICAL_ERROR builder.addSimpleTransform([&](const Block & stream_header) { - return std::make_shared("StorageWindowView: Before out", true, stream_header); + return std::make_shared("StorageWindowView: Before out", stream_header); }); #endif From f6a2c3156bd81fba8a48a04df5e5095fb8b5a384 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 2 Jul 2024 15:24:29 +0200 Subject: [PATCH 150/273] rename transform --- .../Transforms/DeduplicationTokenTransforms.cpp | 4 ++-- .../Transforms/DeduplicationTokenTransforms.h | 4 ++-- src/Processors/Transforms/buildPushingToViewsChain.cpp | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index f50e69e730f..6786f76cbef 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -154,7 +154,7 @@ void CheckTokenTransform::transform(Chunk & chunk) } #endif -String DefineSourceWithChunkHashesTransform::getChunkHash(const Chunk & chunk) +String DefineSourceWithChunkHashTransform::getChunkHash(const Chunk & chunk) { SipHash hash; for (const auto & colunm : chunk.getColumns()) @@ -165,7 +165,7 @@ String DefineSourceWithChunkHashesTransform::getChunkHash(const Chunk & chunk) } -void DefineSourceWithChunkHashesTransform::transform(Chunk & chunk) +void DefineSourceWithChunkHashTransform::transform(Chunk & chunk) { auto token_info = chunk.getChunkInfos().get(); diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 79d168d1000..94287dc4487 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -132,10 +132,10 @@ namespace DeduplicationToken }; - class DefineSourceWithChunkHashesTransform : public ISimpleTransform + class DefineSourceWithChunkHashTransform : public ISimpleTransform { public: - explicit DefineSourceWithChunkHashesTransform(const Block & header_) + explicit DefineSourceWithChunkHashTransform(const Block & header_) : ISimpleTransform(header_, header_, true) { } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 8d38396ecd5..312b333ab33 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -547,7 +547,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (auto * window_view = dynamic_cast(storage.get())) { @@ -555,7 +555,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (dynamic_cast(storage.get())) { @@ -564,7 +564,7 @@ Chain buildPushingToViewsChain( sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } /// Do not push to destination table if the flag is set else if (!no_destination) @@ -573,13 +573,13 @@ Chain buildPushingToViewsChain( metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); sink->setRuntimeData(thread_status, elapsed_counter_ms); - result_chain.addSource(std::make_shared(sink->getHeader())); + result_chain.addSource(std::make_shared(sink->getHeader())); result_chain.addSource(std::move(sink)); } else { - result_chain.addSource(std::make_shared(storage_header)); + result_chain.addSource(std::make_shared(storage_header)); } if (result_chain.empty()) From ad6ddf634db8318f48b8f5e95d17473a1e5ae3e3 Mon Sep 17 00:00:00 2001 From: jwoodhead Date: Tue, 2 Jul 2024 09:33:15 -0500 Subject: [PATCH 151/273] Include offset argument for lagInFrame and leadInFrame window functions. Fixes #65952 --- docs/en/sql-reference/window-functions/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 3a8afd10359..530eaae7283 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -80,8 +80,8 @@ These functions can be used only as a window function. - `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - `rank()` - Rank the current row within its partition with gaps. - `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. -- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +- `lagInFrame(x, offset)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. +- `leadInFrame(x, offset)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. ## Examples From ce19dc5cd91a1424a172959d62d629646dfd7b38 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Tue, 2 Jul 2024 14:37:33 +0000 Subject: [PATCH 152/273] fix test --- .../0_stateless/03161_lightweight_delete_projection.reference | 2 +- .../queries/0_stateless/03161_lightweight_delete_projection.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index 15832d4cdfa..c5a6cbab0bc 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -1,2 +1,2 @@ -8888 Alice 50 1231 John 33 +8888 Alice 50 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index 70a069df1bc..b189388e356 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -26,6 +26,6 @@ SELECT FROM system.projection_parts WHERE (database = currentDatabase()) AND (`table` = 'users'); -SELECT * FROM users; +SELECT * FROM users ORDER BY uid; DROP TABLE users; From 8dfa8d6df48e82e321641a239ccd715c4d188c62 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Jul 2024 16:42:42 +0200 Subject: [PATCH 153/273] Add more Azure profile events + AzureUploadPart to AzureStageBlock --- src/Common/ProfileEvents.cpp | 8 +++++-- src/Coordination/KeeperConstants.cpp | 10 +++++++-- .../IO/WriteBufferFromAzureBlobStorage.cpp | 21 +++++++++++++++++++ .../copyAzureBlobStorageFile.cpp | 17 ++++++++++----- 4 files changed, 47 insertions(+), 9 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index d98373b6c55..eaff2cf8856 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -447,14 +447,18 @@ The server successfully detected this situation and will download merged part fr M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \ \ M(AzureGetObject, "Number of Azure API GetObject calls.") \ - M(AzureUploadPart, "Number of Azure blob storage API UploadPart calls") \ + M(AzureUpload, "Number of Azure blob storage API Upload calls") \ + M(AzureStageBlock, "Number of Azure blob storage API StageBlock calls") \ + M(AzureCommitBlockList, "Number of Azure blob storage API CommitBlockList calls") \ M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \ M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \ M(AzureGetProperties, "Number of Azure blob storage API GetProperties calls.") \ \ M(DiskAzureGetObject, "Number of Disk Azure API GetObject calls.") \ - M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \ + M(DiskAzureUpload, "Number of Disk Azure blob storage API Upload calls") \ + M(DiskAzureStageBlock, "Number of Disk Azure blob storage API StageBlock calls") \ + M(DiskAzureCommitBlockList, "Number of Disk Azure blob storage API CommitBlockList calls") \ M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \ M(DiskAzureListObjects, "Number of Disk Azure blob storage API ListObjects calls.") \ M(DiskAzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp index 51bf037c1c9..76541db6112 100644 --- a/src/Coordination/KeeperConstants.cpp +++ b/src/Coordination/KeeperConstants.cpp @@ -150,12 +150,18 @@ M(S3PutObject) \ M(S3GetObject) \ \ - M(AzureUploadPart) \ - M(DiskAzureUploadPart) \ + M(AzureUpload) \ + M(DiskAzureUpload) \ + M(AzureStageBlock) \ + M(DiskAzureStageBlock) \ + M(AzureCommitBlockList) \ + M(DiskAzureCommitBlockList) \ M(AzureCopyObject) \ M(DiskAzureCopyObject) \ M(AzureDeleteObjects) \ + M(DiskAzureDeleteObjects) \ M(AzureListObjects) \ + M(DiskAzureListObjects) \ \ M(DiskS3DeleteObjects) \ M(DiskS3CopyObject) \ diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index a2d21cf49c2..d1324e22978 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -14,6 +14,15 @@ namespace ProfileEvents { extern const Event RemoteWriteThrottlerBytes; extern const Event RemoteWriteThrottlerSleepMicroseconds; + + extern const Event AzureUpload; + extern const Event AzureStageBlock; + extern const Event AzureCommitBlockList; + + extern const Event DiskAzureUpload; + extern const Event DiskAzureStageBlock; + extern const Event DiskAzureCommitBlockList; + } namespace DB @@ -134,6 +143,10 @@ void WriteBufferFromAzureBlobStorage::preFinalize() /// then we use single part upload instead of multi part upload if (block_ids.empty() && detached_part_data.size() == 1 && detached_part_data.front().data_size <= max_single_part_upload_size) { + ProfileEvents::increment(ProfileEvents::AzureUpload); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureUpload); + auto part_data = std::move(detached_part_data.front()); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(part_data.memory.data()), part_data.data_size); @@ -164,6 +177,10 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() if (!block_ids.empty()) { auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + ProfileEvents::increment(ProfileEvents::AzureCommitBlockList); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureCommitBlockList); + execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); } @@ -269,6 +286,10 @@ void WriteBufferFromAzureBlobStorage::writePart(WriteBufferFromAzureBlobStorage: auto & data_block_id = std::get<0>(*worker_data); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + ProfileEvents::increment(ProfileEvents::AzureStageBlock); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureStageBlock); + Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(std::get<1>(*worker_data).memory.data()), data_size); execWithRetry([&](){ block_blob_client.StageBlock(data_block_id, memory_stream); }, max_unexpected_write_error_retries, data_size); }; diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 8bd436f218c..43052f661b3 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -16,10 +16,14 @@ namespace ProfileEvents { extern const Event AzureCopyObject; - extern const Event AzureUploadPart; + extern const Event AzureUpload; + extern const Event AzureStageBlock; + extern const Event AzureCommitBlockList; extern const Event DiskAzureCopyObject; - extern const Event DiskAzureUploadPart; + extern const Event DiskAzureUpload; + extern const Event DiskAzureStageBlock; + extern const Event DiskAzureCommitBlockList; } @@ -156,6 +160,10 @@ namespace void completeMultipartUpload() { auto block_blob_client = client->GetBlockBlobClient(dest_blob); + ProfileEvents::increment(ProfileEvents::AzureCommitBlockList); + if (client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureCommitBlockList); + block_blob_client.CommitBlockList(block_ids); } @@ -259,9 +267,9 @@ namespace void processUploadPartRequest(UploadPartTask & task) { - ProfileEvents::increment(ProfileEvents::AzureUploadPart); + ProfileEvents::increment(ProfileEvents::AzureStageBlock); if (client->GetClickhouseOptions().IsClientForDisk) - ProfileEvents::increment(ProfileEvents::DiskAzureUploadPart); + ProfileEvents::increment(ProfileEvents::DiskAzureStageBlock); auto block_blob_client = client->GetBlockBlobClient(dest_blob); auto read_buffer = std::make_unique(create_read_buffer(), task.part_offset, task.part_size); @@ -333,7 +341,6 @@ void copyAzureBlobStorageFile( const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule) { - if (settings->use_native_copy) { LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); From 4ac30aa7d578acf00928ac2301fa7b50da9a040f Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Jul 2024 17:18:14 +0200 Subject: [PATCH 154/273] Fxi style --- src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 43052f661b3..6386c7a3c76 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -16,12 +16,10 @@ namespace ProfileEvents { extern const Event AzureCopyObject; - extern const Event AzureUpload; extern const Event AzureStageBlock; extern const Event AzureCommitBlockList; extern const Event DiskAzureCopyObject; - extern const Event DiskAzureUpload; extern const Event DiskAzureStageBlock; extern const Event DiskAzureCommitBlockList; } From b64c1dc711b3f52bfef9f05b13812acd6d683244 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 2 Jul 2024 17:20:29 +0200 Subject: [PATCH 155/273] Update index.md --- docs/en/sql-reference/window-functions/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 530eaae7283..01fae9d9040 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -80,8 +80,8 @@ These functions can be used only as a window function. - `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - `rank()` - Rank the current row within its partition with gaps. - `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x, offset)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. -- `leadInFrame(x, offset)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +- `lagInFrame(x[, offset[, default]])` - Return a value from the column x evaluated at the row that is a specified physical offset ahead of the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the default value is returned if specified; otherwise, a default value based on the column’s data type is used. +- `leadInFrame(x[, offset[, default]])` - Return a value from the column x evaluated at the row that is a specified number of offset rows ahead of the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the default value is used if specified; otherwise, the function returns a default value based on the column’s data type. ## Examples From 2598daa65aab4d58fae1cc5c69ebe9257c189f6b Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 2 Jul 2024 17:29:48 +0200 Subject: [PATCH 156/273] small fix of docs --- docs/en/sql-reference/window-functions/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 01fae9d9040..49076f3cbe1 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -80,8 +80,8 @@ These functions can be used only as a window function. - `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - `rank()` - Rank the current row within its partition with gaps. - `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x[, offset[, default]])` - Return a value from the column x evaluated at the row that is a specified physical offset ahead of the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the default value is returned if specified; otherwise, a default value based on the column’s data type is used. -- `leadInFrame(x[, offset[, default]])` - Return a value from the column x evaluated at the row that is a specified number of offset rows ahead of the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the default value is used if specified; otherwise, the function returns a default value based on the column’s data type. +- `lagInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the specified default value is returned. +- `leadInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the specified default value is used. ## Examples From b5af73a299986c457ba42f59c0d39a53ab4d9053 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Jul 2024 15:48:10 +0000 Subject: [PATCH 157/273] Better --- src/Client/ClientBase.cpp | 7 ++----- src/Client/ClientBase.h | 4 ++-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 56573c15f32..5d472ba99b9 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1206,11 +1206,8 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b if (local_format_error) std::rethrow_exception(local_format_error); - if (cancelled && is_interactive) - { + if (cancelled && is_interactive && !cancelled_printed.exchange(true)) output_stream << "Query was cancelled." << std::endl; - cancelled_printed = true; - } } @@ -1326,7 +1323,7 @@ void ClientBase::onEndOfStream() if (is_interactive) { - if (cancelled && !cancelled_printed) + if (cancelled && !cancelled_printed.exchange(true)) output_stream << "Query was cancelled." << std::endl; else if (!written_first_block) output_stream << "Ok." << std::endl; diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 756400137ad..30dc4168996 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -338,8 +338,8 @@ protected: bool allow_repeated_settings = false; bool allow_merge_tree_settings = false; - bool cancelled = false; - bool cancelled_printed = false; + std::atomic_bool cancelled = false; + std::atomic_bool cancelled_printed = false; /// Unpacked descriptors and streams for the ease of use. int in_fd = STDIN_FILENO; From 7a7633a2309a5f1286f6120513dd75a54aefa1b6 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Tue, 2 Jul 2024 15:17:56 +0000 Subject: [PATCH 158/273] Simplify logic in corner case to avoid comparing doubles The case for > bit_limit is already covered in previous branch, so we just need to cover the other case. This also fixes an overflow that was caused in previous check. e.g. b > B(word_size * n) if sizeof(B) is 1 byte but n is huge --- src/Functions/bitShiftLeft.cpp | 17 ++++++++--------- src/Functions/bitShiftRight.cpp | 16 ++++++++-------- ...ift_throws_error_for_out_of_bounds.reference | 2 ++ ...bit_shift_throws_error_for_out_of_bounds.sql | 2 ++ 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index 645672c50e2..99fd11114aa 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -40,13 +40,12 @@ struct BitShiftLeftImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; + const UInt8 word_size = 8 * sizeof(*pos); size_t n = end - pos; - if (b < 0 || b > B(word_size * n)) + const UInt256 bit_limit = word_size * n; + if (b < 0 || static_cast(b) > bit_limit) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); - - /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size)) + else if (b == bit_limit) { // insert default value out_vec.push_back(0); @@ -109,12 +108,12 @@ struct BitShiftLeftImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; + const UInt8 word_size = 8; size_t n = end - pos; - if (b < 0 || b > B(word_size * n)) + const UInt256 bit_limit = word_size * n; + if (b < 0 || static_cast(b) > bit_limit) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); - /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size)) + else if (b == bit_limit) { // insert default value out_vec.resize_fill(out_vec.size() + n); diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 2e9182d3fe6..bdc193c4be6 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -56,12 +56,12 @@ struct BitShiftRightImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; + const UInt8 word_size = 8; size_t n = end - pos; - if (b < 0 || b > B(word_size * n)) + const UInt256 bit_limit = word_size * n; + if (b < 0 || static_cast(b) > bit_limit) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); - /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size)) + else if (b == bit_limit) { /// insert default value out_vec.push_back(0); @@ -96,12 +96,12 @@ struct BitShiftRightImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; + const UInt8 word_size = 8; size_t n = end - pos; - if (b < 0 || b > B(word_size * n)) + const UInt256 bit_limit = word_size * n; + if (b < 0 || static_cast(b) > bit_limit) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); - /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size)) + else if (b == bit_limit) { // insert default value out_vec.resize_fill(out_vec.size() + n); diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference index d86bac9de59..33b8cd6ee26 100644 --- a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference @@ -1 +1,3 @@ +-- bitShiftRight +-- bitShiftLeft OK diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql index a1a246593d8..aec01753673 100644 --- a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql @@ -1,3 +1,4 @@ +SELECT '-- bitShiftRight'; SELECT bitShiftRight(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftRight(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftRight('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } @@ -5,6 +6,7 @@ SELECT bitShiftRight('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND SELECT bitShiftRight(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftRight(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT '-- bitShiftLeft'; SELECT bitShiftLeft(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftLeft(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT bitShiftLeft('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } From 21f0eb2eecab17f6137639dc9162dbc4301d0d95 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 2 Jul 2024 18:13:49 +0200 Subject: [PATCH 159/273] Do not use async initialization of readers in s3queue --- .../StorageObjectStorageSource.cpp | 94 ++++++++----- .../StorageObjectStorageSource.h | 30 +++- .../ObjectStorageQueueIFileMetadata.cpp | 10 ++ .../ObjectStorageQueueIFileMetadata.h | 2 +- .../ObjectStorageQueueSource.cpp | 133 ++++++------------ .../ObjectStorageQueueSource.h | 36 ++--- .../StorageObjectStorageQueue.cpp | 41 +----- .../integration/test_storage_s3_queue/test.py | 1 + 8 files changed, 158 insertions(+), 189 deletions(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index aef783fc3c4..0f0aae7a6a5 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -65,7 +65,6 @@ StorageObjectStorageSource::StorageObjectStorageSource( CurrentMetrics::StorageObjectStorageThreadsActive, CurrentMetrics::StorageObjectStorageThreadsScheduled, 1/* max_threads */)) - , columns_desc(info.columns_description) , file_iterator(file_iterator_) , schema_cache(StorageObjectStorage::getSchemaCache(context_, configuration->getTypeName())) , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(*create_reader_pool, "Reader")) @@ -156,20 +155,20 @@ std::shared_ptr StorageObjectStorageSourc return iterator; } -void StorageObjectStorageSource::lazyInitialize(size_t processor) +void StorageObjectStorageSource::lazyInitialize() { if (initialized) return; - reader = createReader(processor); + reader = createReader(); if (reader) - reader_future = createReaderAsync(processor); + reader_future = createReaderAsync(); initialized = true; } Chunk StorageObjectStorageSource::generate() { - lazyInitialize(0); + lazyInitialize(); while (true) { @@ -236,27 +235,30 @@ void StorageObjectStorageSource::addNumRowsToCache(const ObjectInfo & object_inf schema_cache.addNumRows(cache_key, num_rows); } -std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfo & object_info) +StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader() { - const auto cache_key = getKeyForSchemaCache( - getUniqueStoragePathIdentifier(*configuration, object_info), - configuration->format, - format_settings, - getContext()); - - auto get_last_mod_time = [&]() -> std::optional - { - return object_info.metadata - ? std::optional(object_info.metadata->last_modified.epochTime()) - : std::nullopt; - }; - return schema_cache.tryGetNumRows(cache_key, get_last_mod_time); + return createReader( + 0, file_iterator, configuration, object_storage, read_from_format_info, format_settings, + key_condition, getContext(), &schema_cache, log, max_block_size, max_parsing_threads, need_only_count); } -StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader(size_t processor) +StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader( + size_t processor, + const std::shared_ptr & file_iterator, + const ConfigurationPtr & configuration, + const ObjectStoragePtr & object_storage, + const ReadFromFormatInfo & read_from_format_info, + const std::optional & format_settings, + const std::shared_ptr & key_condition_, + const ContextPtr & context_, + SchemaCache * schema_cache, + const LoggerPtr & log, + size_t max_block_size, + size_t max_parsing_threads, + bool need_only_count) { ObjectInfoPtr object_info; - auto query_settings = configuration->getQuerySettings(getContext()); + auto query_settings = configuration->getQuerySettings(context_); do { @@ -277,9 +279,29 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade std::shared_ptr source; std::unique_ptr read_buf; + auto try_get_num_rows_from_cache = [&]() -> std::optional + { + if (!schema_cache) + return std::nullopt; + + const auto cache_key = getKeyForSchemaCache( + getUniqueStoragePathIdentifier(*configuration, *object_info), + configuration->format, + format_settings, + context_); + + auto get_last_mod_time = [&]() -> std::optional + { + return object_info->metadata + ? std::optional(object_info->metadata->last_modified.epochTime()) + : std::nullopt; + }; + return schema_cache->tryGetNumRows(cache_key, get_last_mod_time); + }; + std::optional num_rows_from_cache = need_only_count - && getContext()->getSettingsRef().use_cache_for_count_from_files - ? tryGetNumRowsFromCache(*object_info) + && context_->getSettingsRef().use_cache_for_count_from_files + ? try_get_num_rows_from_cache() : std::nullopt; if (num_rows_from_cache) @@ -304,14 +326,14 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade else { compression_method = chooseCompressionMethod(object_info->getFileName(), configuration->compression_method); - read_buf = createReadBuffer(*object_info); + read_buf = createReadBuffer(*object_info, object_storage, context_, log); } auto input_format = FormatFactory::instance().getInput( configuration->format, *read_buf, read_from_format_info.format_header, - getContext(), + context_, max_block_size, format_settings, need_only_count ? 1 : max_parsing_threads, @@ -320,20 +342,20 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade compression_method, need_only_count); - if (key_condition) - input_format->setKeyCondition(key_condition); + if (key_condition_) + input_format->setKeyCondition(key_condition_); if (need_only_count) input_format->needOnlyCount(); builder.init(Pipe(input_format)); - if (columns_desc.hasDefaults()) + if (read_from_format_info.columns_description.hasDefaults()) { builder.addSimpleTransform( [&](const Block & header) { - return std::make_shared(header, columns_desc, *input_format, getContext()); + return std::make_shared(header, read_from_format_info.columns_description, *input_format, context_); }); } @@ -356,21 +378,25 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade object_info, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)); } -std::future StorageObjectStorageSource::createReaderAsync(size_t processor) +std::future StorageObjectStorageSource::createReaderAsync() { - return create_reader_scheduler([=, this] { return createReader(processor); }, Priority{}); + return create_reader_scheduler([=, this] { return createReader(); }, Priority{}); } -std::unique_ptr StorageObjectStorageSource::createReadBuffer(const ObjectInfo & object_info) +std::unique_ptr StorageObjectStorageSource::createReadBuffer( + const ObjectInfo & object_info, + const ObjectStoragePtr & object_storage, + const ContextPtr & context_, + const LoggerPtr & log) { const auto & object_size = object_info.metadata->size_bytes; - auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); + auto read_settings = context_->getReadSettings().adjustBufferSize(object_size); read_settings.enable_filesystem_cache = false; /// FIXME: Changing this setting to default value breaks something around parquet reading read_settings.remote_read_min_bytes_for_seek = read_settings.remote_fs_buffer_size; - const bool object_too_small = object_size <= 2 * getContext()->getSettings().max_download_buffer_size; + const bool object_too_small = object_size <= 2 * context_->getSettings().max_download_buffer_size; const bool use_prefetch = object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; read_settings.remote_fs_method = use_prefetch ? RemoteFSReadMethod::threadpool : RemoteFSReadMethod::read; /// User's object may change, don't cache it. diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index d93097d2636..c2bfff4b997 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -75,7 +75,6 @@ protected: const ReadFromFormatInfo read_from_format_info; const std::shared_ptr create_reader_pool; - ColumnsDescription columns_desc; std::shared_ptr file_iterator; SchemaCache & schema_cache; bool initialized = false; @@ -116,13 +115,32 @@ protected: std::future reader_future; /// Recreate ReadBuffer and Pipeline for each file. - ReaderHolder createReader(size_t processor = 0); - std::future createReaderAsync(size_t processor = 0); - std::unique_ptr createReadBuffer(const ObjectInfo & object_info); + static ReaderHolder createReader( + size_t processor, + const std::shared_ptr & file_iterator, + const ConfigurationPtr & configuration, + const ObjectStoragePtr & object_storage, + const ReadFromFormatInfo & read_from_format_info, + const std::optional & format_settings, + const std::shared_ptr & key_condition_, + const ContextPtr & context_, + SchemaCache * schema_cache, + const LoggerPtr & log, + size_t max_block_size, + size_t max_parsing_threads, + bool need_only_count); + + ReaderHolder createReader(); + + std::future createReaderAsync(); + static std::unique_ptr createReadBuffer( + const ObjectInfo & object_info, + const ObjectStoragePtr & object_storage, + const ContextPtr & context_, + const LoggerPtr & log); void addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows); - std::optional tryGetNumRowsFromCache(const ObjectInfo & object_info); - void lazyInitialize(size_t processor); + void lazyInitialize(); }; class StorageObjectStorageSource::IIterator diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp index 52ee0c9f8ed..6fac519849d 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp @@ -62,6 +62,11 @@ void ObjectStorageQueueIFileMetadata::FileStatus::onFailed(const std::string & e last_exception = exception; } +void ObjectStorageQueueIFileMetadata::FileStatus::updateState(State state_) +{ + state = state_; +} + std::string ObjectStorageQueueIFileMetadata::FileStatus::getException() const { std::lock_guard lock(last_exception_mutex); @@ -224,9 +229,14 @@ bool ObjectStorageQueueIFileMetadata::setProcessing() auto [success, file_state] = setProcessingImpl(); if (success) + { file_status->onProcessing(); + } else + { + LOG_TEST(log, "Updating state of {} from {} to {}", path, file_status->state.load(), file_state); file_status->updateState(file_state); + } LOG_TEST(log, "File {} has state `{}`: will {}process (processing id version: {})", path, file_state, success ? "" : "not ", diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h index 652b4742389..920beaa6f21 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h @@ -23,7 +23,7 @@ public: void onProcessing(); void onProcessed(); void onFailed(const std::string & exception); - void updateState(State state_) { state = state_; } + void updateState(State state_); std::string getException() const; diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 955e49bc2bf..683a7038bb6 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -357,41 +357,38 @@ ObjectStorageQueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t proc ObjectStorageQueueSource::ObjectStorageQueueSource( String name_, size_t processor_id_, - const Block & header_, - std::unique_ptr internal_source_, + std::shared_ptr file_iterator_, + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + const ReadFromFormatInfo & read_from_format_info_, + const std::optional & format_settings_, + const ObjectStorageQueueSettings & queue_settings_, std::shared_ptr files_metadata_, - const ObjectStorageQueueAction & action_, - RemoveFileFunc remove_file_func_, - const NamesAndTypesList & requested_virtual_columns_, ContextPtr context_, + size_t max_block_size_, const std::atomic & shutdown_called_, const std::atomic & table_is_being_dropped_, std::shared_ptr system_queue_log_, const StorageID & storage_id_, LoggerPtr log_, - size_t max_processed_files_before_commit_, - size_t max_processed_rows_before_commit_, - size_t max_processed_bytes_before_commit_, - size_t max_processing_time_sec_before_commit_, bool commit_once_processed_) - : ISource(header_) + : ISource(read_from_format_info_.source_header) , WithContext(context_) , name(std::move(name_)) , processor_id(processor_id_) - , action(action_) + , file_iterator(file_iterator_) + , configuration(configuration_) + , object_storage(object_storage_) + , read_from_format_info(read_from_format_info_) + , format_settings(format_settings_) + , queue_settings(queue_settings_) , files_metadata(files_metadata_) - , internal_source(std::move(internal_source_)) - , requested_virtual_columns(requested_virtual_columns_) + , max_block_size(max_block_size_) , shutdown_called(shutdown_called_) , table_is_being_dropped(table_is_being_dropped_) , system_queue_log(system_queue_log_) , storage_id(storage_id_) - , max_processed_files_before_commit(max_processed_files_before_commit_) - , max_processed_rows_before_commit(max_processed_rows_before_commit_) - , max_processed_bytes_before_commit(max_processed_bytes_before_commit_) - , max_processing_time_sec_before_commit(max_processing_time_sec_before_commit_) , commit_once_processed(commit_once_processed_) - , remove_file_func(remove_file_func_) , log(log_) { } @@ -401,21 +398,6 @@ String ObjectStorageQueueSource::getName() const return name; } -void ObjectStorageQueueSource::lazyInitialize(size_t processor) -{ - if (initialized) - return; - - LOG_TEST(log, "Initializing a new reader"); - - internal_source->lazyInitialize(processor); - reader = std::move(internal_source->reader); - if (reader) - reader_future = std::move(internal_source->reader_future); - - initialized = true; -} - Chunk ObjectStorageQueueSource::generate() { Chunk chunk; @@ -440,14 +422,21 @@ Chunk ObjectStorageQueueSource::generate() Chunk ObjectStorageQueueSource::generateImpl() { - lazyInitialize(processor_id); - - while (true) + while (!shutdown_called) { if (!reader) { - LOG_TEST(log, "No reader"); - break; + const auto context = getContext(); + reader = StorageObjectStorageSource::createReader( + processor_id, file_iterator, configuration, object_storage, read_from_format_info, + format_settings, nullptr, context, nullptr, log, max_block_size, + context->getSettingsRef().max_parsing_threads.value, /* need_only_count */false); + + if (!reader) + { + LOG_TEST(log, "No reader"); + break; + } } const auto * object_info = dynamic_cast(reader.getObjectInfo().get()); @@ -528,7 +517,7 @@ Chunk ObjectStorageQueueSource::generateImpl() total_processed_bytes += chunk.bytes(); VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( - chunk, requested_virtual_columns, + chunk, read_from_format_info.requested_virtual_columns, { .path = path, .size = reader.getObjectInfo()->metadata->size_bytes @@ -547,9 +536,6 @@ Chunk ObjectStorageQueueSource::generateImpl() if (processed_rows_from_file == 0) { - auto * file_iterator = dynamic_cast(internal_source->file_iterator.get()); - chassert(file_iterator); - if (file_status->retries < file_metadata->getMaxTries()) file_iterator->returnForRetry(reader.getObjectInfo()); @@ -564,11 +550,13 @@ Chunk ObjectStorageQueueSource::generateImpl() file_status->setProcessingEndTime(); file_status.reset(); + reader = {}; processed_rows_from_file = 0; processed_files.push_back(file_metadata); - if (processed_files.size() == max_processed_files_before_commit) + if (queue_settings.max_processed_files_before_commit + && processed_files.size() == queue_settings.max_processed_files_before_commit) { LOG_TRACE(log, "Number of max processed files before commit reached " "(rows: {}, bytes: {}, files: {})", @@ -576,68 +564,30 @@ Chunk ObjectStorageQueueSource::generateImpl() break; } - bool rows_or_bytes_or_time_limit_reached = false; - if (max_processed_rows_before_commit - && total_processed_rows == max_processed_rows_before_commit) + if (queue_settings.max_processed_rows_before_commit + && total_processed_rows == queue_settings.max_processed_rows_before_commit) { LOG_TRACE(log, "Number of max processed rows before commit reached " "(rows: {}, bytes: {}, files: {})", total_processed_rows, total_processed_bytes, processed_files.size()); - - rows_or_bytes_or_time_limit_reached = true; + break; } - else if (max_processed_bytes_before_commit - && total_processed_bytes == max_processed_bytes_before_commit) + else if (queue_settings.max_processed_bytes_before_commit + && total_processed_bytes == queue_settings.max_processed_bytes_before_commit) { LOG_TRACE(log, "Number of max processed bytes before commit reached " "(rows: {}, bytes: {}, files: {})", total_processed_rows, total_processed_bytes, processed_files.size()); - - rows_or_bytes_or_time_limit_reached = true; + break; } - else if (max_processing_time_sec_before_commit - && total_stopwatch.elapsedSeconds() >= max_processing_time_sec_before_commit) + else if (queue_settings.max_processing_time_sec_before_commit + && total_stopwatch.elapsedSeconds() >= queue_settings.max_processing_time_sec_before_commit) { LOG_TRACE(log, "Max processing time before commit reached " "(rows: {}, bytes: {}, files: {})", total_processed_rows, total_processed_bytes, processed_files.size()); - - rows_or_bytes_or_time_limit_reached = true; - } - - if (rows_or_bytes_or_time_limit_reached) - { - if (!reader_future.valid()) - break; - - LOG_TRACE(log, "Rows or bytes limit reached, but we have one more file scheduled already, " - "will process it despite the limit"); - } - - if (shutdown_called) - { - LOG_TRACE(log, "Shutdown was called, stopping sync"); break; } - - chassert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - { - LOG_TEST(log, "Reader finished"); - break; - } - - file_status = files_metadata->getFileStatus(reader.getObjectInfo()->getPath()); - - if (!rows_or_bytes_or_time_limit_reached && processed_files.size() + 1 < max_processed_files_before_commit) - { - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - internal_source->create_reader_pool->wait(); - reader_future = internal_source->createReaderAsync(processor_id); - } } return {}; @@ -681,12 +631,11 @@ void ObjectStorageQueueSource::commit(bool success, const std::string & exceptio void ObjectStorageQueueSource::applyActionAfterProcessing(const String & path) { - switch (action) + switch (queue_settings.after_processing.value) { case ObjectStorageQueueAction::DELETE: { - assert(remove_file_func); - remove_file_func(path); + object_storage->removeObject(StoredObject(path)); break; } case ObjectStorageQueueAction::KEEP: diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h index ccd87e8a269..fce2a426ecb 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h @@ -97,22 +97,20 @@ public: ObjectStorageQueueSource( String name_, size_t processor_id_, - const Block & header_, - std::unique_ptr internal_source_, + std::shared_ptr file_iterator_, + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + const ReadFromFormatInfo & read_from_format_info_, + const std::optional & format_settings_, + const ObjectStorageQueueSettings & queue_settings_, std::shared_ptr files_metadata_, - const ObjectStorageQueueAction & action_, - RemoveFileFunc remove_file_func_, - const NamesAndTypesList & requested_virtual_columns_, ContextPtr context_, + size_t max_block_size_, const std::atomic & shutdown_called_, const std::atomic & table_is_being_dropped_, std::shared_ptr system_queue_log_, const StorageID & storage_id_, LoggerPtr log_, - size_t max_processed_files_before_commit_, - size_t max_processed_rows_before_commit_, - size_t max_processed_bytes_before_commit_, - size_t max_processing_time_sec_before_commit_, bool commit_once_processed_); static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); @@ -128,29 +126,27 @@ public: private: const String name; const size_t processor_id; - const ObjectStorageQueueAction action; + const std::shared_ptr file_iterator; + const ConfigurationPtr configuration; + const ObjectStoragePtr object_storage; + const ReadFromFormatInfo read_from_format_info; + const std::optional format_settings; + const ObjectStorageQueueSettings queue_settings; const std::shared_ptr files_metadata; - const std::shared_ptr internal_source; - const NamesAndTypesList requested_virtual_columns; + const size_t max_block_size; + const std::atomic & shutdown_called; const std::atomic & table_is_being_dropped; const std::shared_ptr system_queue_log; const StorageID storage_id; - const size_t max_processed_files_before_commit; - const size_t max_processed_rows_before_commit; - const size_t max_processed_bytes_before_commit; - const size_t max_processing_time_sec_before_commit; const bool commit_once_processed; - RemoveFileFunc remove_file_func; LoggerPtr log; std::vector processed_files; std::vector failed_during_read_files; Source::ReaderHolder reader; - std::future reader_future; - std::atomic initialized{false}; size_t processed_rows_from_file = 0; size_t total_processed_rows = 0; @@ -165,8 +161,6 @@ private: ObjectStorageQueueMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); - - void lazyInitialize(size_t processor); }; } diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 95265cde9ea..4388864434e 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -352,43 +352,14 @@ std::shared_ptr StorageObjectStorageQueue::createSourc ContextPtr local_context, bool commit_once_processed) { - auto internal_source = std::make_unique( - getName(), - object_storage, - configuration, - info, - format_settings, - local_context, - max_block_size, - file_iterator, - local_context->getSettingsRef().max_download_threads, - false); - - auto file_deleter = [=, this](const std::string & path) mutable - { - object_storage->removeObject(StoredObject(path)); - }; - return std::make_shared( - getName(), - processor_id, - info.source_header, - std::move(internal_source), - files_metadata, - queue_settings->after_processing, - file_deleter, - info.requested_virtual_columns, - local_context, - shutdown_called, - table_is_being_dropped, + getName(), processor_id, + file_iterator, configuration, object_storage, + info, format_settings, + *queue_settings, files_metadata, + local_context, max_block_size, shutdown_called, table_is_being_dropped, getQueueLog(object_storage, local_context, *queue_settings), - getStorageID(), - log, - queue_settings->max_processed_files_before_commit, - queue_settings->max_processed_rows_before_commit, - queue_settings->max_processed_bytes_before_commit, - queue_settings->max_processing_time_sec_before_commit, - commit_once_processed); + getStorageID(), log, commit_once_processed); } bool StorageObjectStorageQueue::hasDependencies(const StorageID & table_id) diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index b93e560d5b9..bf3c28c5429 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -1780,6 +1780,7 @@ def test_commit_on_limit(started_cluster): if "test_999999.csv" in get_processed_files(): break time.sleep(1) + assert "test_999999.csv" in get_processed_files() assert 1 == int( From 389a86ec059461f20521e4dd2c7888e2b3b37623 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Tue, 2 Jul 2024 16:20:42 +0000 Subject: [PATCH 160/273] Sort error codes alphabetically --- src/Functions/bitShiftLeft.cpp | 2 +- src/Functions/bitShiftRight.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index 99fd11114aa..8e39ed86461 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -5,9 +5,9 @@ namespace DB { namespace ErrorCodes { + extern const int ARGUMENT_OUT_OF_BOUND; extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; - extern const int ARGUMENT_OUT_OF_BOUND; } namespace diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index bdc193c4be6..46cfcde8a33 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -6,9 +6,9 @@ namespace DB { namespace ErrorCodes { + extern const int ARGUMENT_OUT_OF_BOUND; extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; - extern const int ARGUMENT_OUT_OF_BOUND; } namespace From 0ed34661243e918a81d8823dbec8917ef88ab3b2 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 15:30:16 +0000 Subject: [PATCH 161/273] Cleanup FunctionArgumentDescriptor const char * can be nullptr, std::string_view can't. --- src/Functions/FunctionHelpers.cpp | 33 ++++--- src/Functions/FunctionHelpers.h | 85 ++++++++----------- src/Functions/FunctionStringReplace.h | 2 +- src/Functions/FunctionTokens.h | 4 +- src/Functions/FunctionUnixTimestamp64.h | 2 +- src/Functions/FunctionsAES.h | 4 +- src/Functions/FunctionsConversion.cpp | 6 +- src/Functions/FunctionsRound.h | 2 +- src/Functions/JSONArrayLength.cpp | 2 +- src/Functions/URL/URLHierarchy.cpp | 2 +- src/Functions/URL/URLPathHierarchy.cpp | 2 +- .../URL/extractURLParameterNames.cpp | 2 +- src/Functions/URL/extractURLParameters.cpp | 2 +- src/Functions/array/arrayJaccardIndex.cpp | 2 +- src/Functions/array/arrayRandomSample.cpp | 2 +- src/Functions/array/arrayShingles.cpp | 2 +- src/Functions/arrayStringConcat.cpp | 2 +- src/Functions/castOrDefault.cpp | 6 +- src/Functions/countMatches.h | 2 +- src/Functions/dateTimeToSnowflakeID.cpp | 4 +- src/Functions/extractAll.cpp | 2 +- src/Functions/extractAllGroups.h | 2 +- src/Functions/extractGroups.cpp | 2 +- src/Functions/formatQuery.cpp | 2 +- src/Functions/fromDaysSinceYearZero.cpp | 2 +- src/Functions/generateSnowflakeID.cpp | 2 +- src/Functions/generateUUIDv4.cpp | 4 +- src/Functions/generateUUIDv7.cpp | 2 +- src/Functions/makeDate.cpp | 14 +-- src/Functions/parseDateTime.cpp | 2 +- src/Functions/parseReadableSize.cpp | 2 +- src/Functions/regexpExtract.cpp | 2 +- src/Functions/repeat.cpp | 2 +- src/Functions/seriesDecomposeSTL.cpp | 2 +- src/Functions/seriesOutliersDetectTukey.cpp | 2 +- src/Functions/snowflake.cpp | 8 +- src/Functions/snowflakeIDToDateTime.cpp | 4 +- src/Functions/space.cpp | 2 +- src/Functions/timestamp.cpp | 2 +- src/Functions/toDecimalString.cpp | 2 +- 40 files changed, 108 insertions(+), 122 deletions(-) diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index 593646240ca..0027f9f281f 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -97,7 +97,7 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName void validateArgumentType(const IFunction & func, const DataTypes & arguments, size_t argument_index, bool (* validator_func)(const IDataType &), - const char * expected_type_description) + const char * type_name) { if (arguments.size() <= argument_index) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of arguments of function {}", @@ -106,7 +106,7 @@ void validateArgumentType(const IFunction & func, const DataTypes & arguments, const auto & argument = arguments[argument_index]; if (!validator_func(*argument)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {}, expected {}", - argument->getName(), std::to_string(argument_index), func.getName(), expected_type_description); + argument->getName(), argument_index, func.getName(), type_name); } namespace @@ -120,9 +120,7 @@ void validateArgumentsImpl(const IFunction & func, { const auto argument_index = i + argument_offset; if (argument_index >= arguments.size()) - { break; - } const auto & arg = arguments[i + argument_offset]; const auto & descriptor = descriptors[i]; @@ -130,10 +128,10 @@ void validateArgumentsImpl(const IFunction & func, throw Exception(error_code, "Illegal type of argument #{}{} of function {}{}{}", argument_offset + i + 1, // +1 is for human-friendly 1-based indexing - (descriptor.argument_name ? " '" + std::string(descriptor.argument_name) + "'" : String{}), + " '" + String(descriptor.name) + "'", func.getName(), - (descriptor.expected_type_description ? String(", expected ") + descriptor.expected_type_description : String{}), - (arg.type ? ", got " + arg.type->getName() : String{})); + String(", expected ") + String(descriptor.type_name), + arg.type ? ", got " + arg.type->getName() : String{}); } } @@ -141,19 +139,22 @@ void validateArgumentsImpl(const IFunction & func, int FunctionArgumentDescriptor::isValid(const DataTypePtr & data_type, const ColumnPtr & column) const { - if (type_validator_func && (data_type == nullptr || !type_validator_func(*data_type))) + if (name.empty() || type_name.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "name or type_name are not set"); + + if (type_validator && (data_type == nullptr || !type_validator(*data_type))) return ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT; - if (column_validator_func && (column == nullptr || !column_validator_func(*column))) + if (column_validator && (column == nullptr || !column_validator(*column))) return ErrorCodes::ILLEGAL_COLUMN; return 0; } -void validateFunctionArgumentTypes(const IFunction & func, - const ColumnsWithTypeAndName & arguments, - const FunctionArgumentDescriptors & mandatory_args, - const FunctionArgumentDescriptors & optional_args) +void validateFunctionArguments(const IFunction & func, + const ColumnsWithTypeAndName & arguments, + const FunctionArgumentDescriptors & mandatory_args, + const FunctionArgumentDescriptors & optional_args) { if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size()) { @@ -165,10 +166,8 @@ void validateFunctionArgumentTypes(const IFunction & func, using A = std::decay_t; if constexpr (std::is_same_v) { - if (a.argument_name) - result += "'" + std::string(a.argument_name) + "' : "; - if (a.expected_type_description) - result += a.expected_type_description; + result += "'" + String(a.name) + "' : "; + result += a.type_name; } else if constexpr (std::is_same_v) result += a.type->getName(); diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h index 6267d8eacc4..c08eb5265c1 100644 --- a/src/Functions/FunctionHelpers.h +++ b/src/Functions/FunctionHelpers.h @@ -119,73 +119,60 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName /// throws if there is no argument at specified index or if predicate returns false. void validateArgumentType(const IFunction & func, const DataTypes & arguments, size_t argument_index, bool (* validator_func)(const IDataType &), - const char * expected_type_description); + const char * type_name); -/** Simple validator that is used in conjunction with validateFunctionArgumentTypes() to check if function arguments are as expected - * - * Also it is used to generate function description when arguments do not match expected ones. - * Any field can be null: - * `argument_name` - if not null, reported via type check errors. - * `expected_type_description` - if not null, reported via type check errors. - * `type_validator_func` - if not null, used to validate data type of function argument. - * `column_validator_func` - if not null, used to validate column of function argument. - */ +/// Expected arguments for a function. Can be used in conjunction with validateFunctionArguments() to check that the user-provided +/// arguments match the expected arguments. struct FunctionArgumentDescriptor { - const char * argument_name; + /// The argument name, e.g. "longitude". + /// Should not be empty. + std::string_view name; + /// A function which validates the argument data type. + /// May be nullptr. using TypeValidator = bool (*)(const IDataType &); - TypeValidator type_validator_func; + TypeValidator type_validator; + + /// A function which validates the argument column. + /// May be nullptr. using ColumnValidator = bool (*)(const IColumn &); - ColumnValidator column_validator_func; + ColumnValidator column_validator; - const char * expected_type_description; + /// The expected argument type, e.g. "const String" or "UInt64". + /// Should not be empty. + std::string_view type_name; - /** Validate argument type and column. - * - * Returns non-zero error code if: - * Validator != nullptr && (Value == nullptr || Validator(*Value) == false) - * For: - * Validator is either `type_validator_func` or `column_validator_func` - * Value is either `data_type` or `column` respectively. - * ILLEGAL_TYPE_OF_ARGUMENT if type validation fails - * - */ + /// Validate argument type and column. int isValid(const DataTypePtr & data_type, const ColumnPtr & column) const; }; using FunctionArgumentDescriptors = std::vector; -/** Validate that function arguments match specification. - * - * Designed to simplify argument validation for functions with variable arguments - * (e.g. depending on result type or other trait). - * First, checks that number of arguments is as expected (including optional arguments). - * Second, checks that mandatory args present and have valid type. - * Third, checks optional arguments types, skipping ones that are missing. - * - * Please note that if you have several optional arguments, like f([a, b, c]), - * only these calls are considered valid: - * f(a) - * f(a, b) - * f(a, b, c) - * - * But NOT these: f(a, c), f(b, c) - * In other words you can't omit middle optional arguments (just like in regular C++). - * - * If any mandatory arg is missing, throw an exception, with explicit description of expected arguments. - */ -void validateFunctionArgumentTypes(const IFunction & func, const ColumnsWithTypeAndName & arguments, - const FunctionArgumentDescriptors & mandatory_args, - const FunctionArgumentDescriptors & optional_args = {}); +/// Validates that the user-provided arguments match the expected arguments. +/// +/// Checks that +/// - the number of provided arguments matches the number of mandatory/optional arguments, +/// - all mandatory arguments are present and have the right type, +/// - optional arguments - if present - have the right type. +/// +/// With multiple optional arguments, e.g. f([a, b, c]), provided arguments must match left-to-right. E.g. these calls are considered valid: +/// f(a) +/// f(a, b) +/// f(a, b, c) +/// but these are NOT: +/// f(a, c) +/// f(b, c) +void validateFunctionArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments, + const FunctionArgumentDescriptors & mandatory_args, + const FunctionArgumentDescriptors & optional_args = {}); /// Checks if a list of array columns have equal offsets. Return a pair of nested columns and offsets if true, otherwise throw. std::pair, const ColumnArray::Offset *> checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments); -/** Return ColumnNullable of src, with null map as OR-ed null maps of args columns. - * Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL. - */ +/// Return ColumnNullable of src, with null map as OR-ed null maps of args columns. +/// Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL. ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count); struct NullPresence diff --git a/src/Functions/FunctionStringReplace.h b/src/Functions/FunctionStringReplace.h index aee04a5969a..b4bcfa514a8 100644 --- a/src/Functions/FunctionStringReplace.h +++ b/src/Functions/FunctionStringReplace.h @@ -40,7 +40,7 @@ public: {"replacement", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h index d6cf6a24983..0ca47126198 100644 --- a/src/Functions/FunctionTokens.h +++ b/src/Functions/FunctionTokens.h @@ -194,7 +194,7 @@ static inline void checkArgumentsWithSeparatorAndOptionalMaxSubstrings( {"max_substrings", static_cast(&isNativeInteger), isColumnConst, "const Number"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args); + validateFunctionArguments(func, arguments, mandatory_args, optional_args); } static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & func, const ColumnsWithTypeAndName & arguments) @@ -207,7 +207,7 @@ static inline void checkArgumentsWithOptionalMaxSubstrings(const IFunction & fun {"max_substrings", static_cast(&isNativeInteger), isColumnConst, "const Number"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args); + validateFunctionArguments(func, arguments, mandatory_args, optional_args); } } diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index c418163343b..e282bcfbfe2 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -47,7 +47,7 @@ public: FunctionArgumentDescriptors args{ {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 524b4f82acd..7af6265eba9 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -165,7 +165,7 @@ private: }); } - validateFunctionArgumentTypes(*this, arguments, + validateFunctionArguments(*this, arguments, FunctionArgumentDescriptors{ {"mode", static_cast(&isStringOrFixedString), isColumnConst, "encryption mode string"}, {"input", static_cast(&isStringOrFixedString), {}, "plaintext"}, @@ -438,7 +438,7 @@ private: }); } - validateFunctionArgumentTypes(*this, arguments, + validateFunctionArguments(*this, arguments, FunctionArgumentDescriptors{ {"mode", static_cast(&isStringOrFixedString), isColumnConst, "decryption mode string"}, {"input", static_cast(&isStringOrFixedString), {}, "ciphertext"}, diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 2a0b2f1d075..f3e54d2fbd9 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -2020,7 +2020,7 @@ public: DataTypePtr getReturnTypeImplRemovedNullable(const ColumnsWithTypeAndName & arguments) const { - FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; + FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, "any type"}}; FunctionArgumentDescriptors optional_args; if constexpr (to_decimal) @@ -2049,7 +2049,7 @@ public: optional_args.push_back({"timezone", static_cast(&isString), nullptr, "String"}); } - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); if constexpr (std::is_same_v) { @@ -2390,7 +2390,7 @@ public: if (isDateTime64(arguments)) { - validateFunctionArgumentTypes(*this, arguments, + validateFunctionArguments(*this, arguments, FunctionArgumentDescriptors{{"string", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}}, // optional FunctionArgumentDescriptors{ diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index 08e257de8ac..7a907e56a7d 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -647,7 +647,7 @@ public: FunctionArgumentDescriptors optional_args{ {"N", static_cast(&isNativeInteger), nullptr, "The number of decimal places to round to"}, }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return arguments[0].type; } diff --git a/src/Functions/JSONArrayLength.cpp b/src/Functions/JSONArrayLength.cpp index 84e87061398..73dd55f1266 100644 --- a/src/Functions/JSONArrayLength.cpp +++ b/src/Functions/JSONArrayLength.cpp @@ -48,7 +48,7 @@ namespace {"json", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/URL/URLHierarchy.cpp b/src/Functions/URL/URLHierarchy.cpp index c08f41f06ee..0f565df8172 100644 --- a/src/Functions/URL/URLHierarchy.cpp +++ b/src/Functions/URL/URLHierarchy.cpp @@ -32,7 +32,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/URL/URLPathHierarchy.cpp b/src/Functions/URL/URLPathHierarchy.cpp index 7c796116b8d..2cb5995e375 100644 --- a/src/Functions/URL/URLPathHierarchy.cpp +++ b/src/Functions/URL/URLPathHierarchy.cpp @@ -30,7 +30,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/URL/extractURLParameterNames.cpp b/src/Functions/URL/extractURLParameterNames.cpp index 16ace36d39b..b3d51d02162 100644 --- a/src/Functions/URL/extractURLParameterNames.cpp +++ b/src/Functions/URL/extractURLParameterNames.cpp @@ -30,7 +30,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/URL/extractURLParameters.cpp b/src/Functions/URL/extractURLParameters.cpp index 43079834872..ce2aadaeede 100644 --- a/src/Functions/URL/extractURLParameters.cpp +++ b/src/Functions/URL/extractURLParameters.cpp @@ -31,7 +31,7 @@ public: {"URL", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {} diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index 87f3390ac73..7db20667888 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -87,7 +87,7 @@ public: {"array_1", static_cast(&isArray), nullptr, "Array"}, {"array_2", static_cast(&isArray), nullptr, "Array"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared>(); } diff --git a/src/Functions/array/arrayRandomSample.cpp b/src/Functions/array/arrayRandomSample.cpp index b08a73b93f3..6e176b6e33d 100644 --- a/src/Functions/array/arrayRandomSample.cpp +++ b/src/Functions/array/arrayRandomSample.cpp @@ -39,7 +39,7 @@ public: {"array", static_cast(&isArray), nullptr, "Array"}, {"samples", static_cast(&isUInt), isColumnConst, "const UInt*"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); // Return an array with the same nested type as the input array const DataTypePtr & array_type = arguments[0].type; diff --git a/src/Functions/array/arrayShingles.cpp b/src/Functions/array/arrayShingles.cpp index 8932482c69c..7c97d8136fb 100644 --- a/src/Functions/array/arrayShingles.cpp +++ b/src/Functions/array/arrayShingles.cpp @@ -31,7 +31,7 @@ public: {"array", static_cast(&isArray), nullptr, "Array"}, {"length", static_cast(&isInteger), nullptr, "Integer"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); const DataTypeArray * array_type = checkAndGetDataType(arguments[0].type.get()); return std::make_shared(std::make_shared(array_type->getNestedType())); diff --git a/src/Functions/arrayStringConcat.cpp b/src/Functions/arrayStringConcat.cpp index 421408c01f2..12bab410fec 100644 --- a/src/Functions/arrayStringConcat.cpp +++ b/src/Functions/arrayStringConcat.cpp @@ -159,7 +159,7 @@ public: {"separator", static_cast(&isString), isColumnConst, "const String"}, }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 44b39811882..995b5fa91e7 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -203,7 +203,7 @@ private: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; + FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, "any type"}}; FunctionArgumentDescriptors optional_args; if (isDecimal(type) || isDateTime64(type)) @@ -212,9 +212,9 @@ private: if (isDateTimeOrDateTime64(type)) optional_args.push_back({"timezone", static_cast(&isString), isColumnConst, "const String"}); - optional_args.push_back({"default_value", nullptr, nullptr, nullptr}); + optional_args.push_back({"default_value", nullptr, nullptr, "any type"}); - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); size_t additional_argument_index = 1; diff --git a/src/Functions/countMatches.h b/src/Functions/countMatches.h index fbbb9d017ee..5f07b936e26 100644 --- a/src/Functions/countMatches.h +++ b/src/Functions/countMatches.h @@ -38,7 +38,7 @@ public: {"haystack", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}, {"pattern", static_cast(&isString), isColumnConst, "constant String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/dateTimeToSnowflakeID.cpp b/src/Functions/dateTimeToSnowflakeID.cpp index 968a7628ca5..c48f8c13152 100644 --- a/src/Functions/dateTimeToSnowflakeID.cpp +++ b/src/Functions/dateTimeToSnowflakeID.cpp @@ -43,7 +43,7 @@ public: FunctionArgumentDescriptors optional_args{ {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); return std::make_shared(); } @@ -91,7 +91,7 @@ public: FunctionArgumentDescriptors optional_args{ {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); return std::make_shared(); } diff --git a/src/Functions/extractAll.cpp b/src/Functions/extractAll.cpp index 5801a7b8f4f..4a3eb32474c 100644 --- a/src/Functions/extractAll.cpp +++ b/src/Functions/extractAll.cpp @@ -59,7 +59,7 @@ public: {"pattern", static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(func, arguments, mandatory_args); + validateFunctionArguments(func, arguments, mandatory_args); } static constexpr auto strings_argument_position = 0uz; diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index dfcd0e31715..7732855b211 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -74,7 +74,7 @@ public: {"haystack", static_cast(&isStringOrFixedString), nullptr, "const String or const FixedString"}, {"needle", static_cast(&isStringOrFixedString), isColumnConst, "const String or const FixedString"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); /// Two-dimensional array of strings, each `row` of top array represents matching groups. return std::make_shared(std::make_shared(std::make_shared())); diff --git a/src/Functions/extractGroups.cpp b/src/Functions/extractGroups.cpp index f62352af0bd..ac6266a2e82 100644 --- a/src/Functions/extractGroups.cpp +++ b/src/Functions/extractGroups.cpp @@ -48,7 +48,7 @@ public: {"haystack", static_cast(&isStringOrFixedString), nullptr, "const String or const FixedString"}, {"needle", static_cast(&isStringOrFixedString), isColumnConst, "const String or const FixedString"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index 3b632147864..655ea2e7cde 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -54,7 +54,7 @@ public: FunctionArgumentDescriptors args{ {"query", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); DataTypePtr string_type = std::make_shared(); if (error_handling == ErrorHandling::Null) diff --git a/src/Functions/fromDaysSinceYearZero.cpp b/src/Functions/fromDaysSinceYearZero.cpp index b98c587d172..0543e6bf229 100644 --- a/src/Functions/fromDaysSinceYearZero.cpp +++ b/src/Functions/fromDaysSinceYearZero.cpp @@ -54,7 +54,7 @@ public: { FunctionArgumentDescriptors args{{"days", static_cast(&isNativeInteger), nullptr, "Integer"}}; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 8ac010deafc..a171b6bf86e 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -167,7 +167,7 @@ public: FunctionArgumentDescriptors optional_args{ {"expr", nullptr, nullptr, "Arbitrary expression"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index b0fec43fe94..a928f9009c8 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -30,9 +30,9 @@ public: { FunctionArgumentDescriptors mandatory_args; FunctionArgumentDescriptors optional_args{ - {"expr", nullptr, nullptr, "Arbitrary Expression"} + {"expr", nullptr, nullptr, "any type"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/generateUUIDv7.cpp b/src/Functions/generateUUIDv7.cpp index b226c0840f4..a9ed08d9f83 100644 --- a/src/Functions/generateUUIDv7.cpp +++ b/src/Functions/generateUUIDv7.cpp @@ -163,7 +163,7 @@ public: FunctionArgumentDescriptors optional_args{ {"expr", nullptr, nullptr, "Arbitrary expression"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index 3d8b8617472..41a09793994 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -87,7 +87,7 @@ public: {mandatory_argument_names_year_month_day[1], static_cast(&isNumber), nullptr, "Number"}, {mandatory_argument_names_year_month_day[2], static_cast(&isNumber), nullptr, "Number"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); } else { @@ -95,7 +95,7 @@ public: {mandatory_argument_names_year_dayofyear[0], static_cast(&isNumber), nullptr, "Number"}, {mandatory_argument_names_year_dayofyear[1], static_cast(&isNumber), nullptr, "Number"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); } return std::make_shared(); @@ -193,7 +193,7 @@ public: {mandatory_argument_names[0], static_cast(&isNumber), nullptr, "Number"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } @@ -357,7 +357,7 @@ public: {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); /// Optional timezone argument std::string timezone; @@ -440,7 +440,7 @@ public: {optional_argument_names[2], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); if (arguments.size() >= mandatory_argument_names.size() + 1) { @@ -572,7 +572,7 @@ public: {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); /// Optional timezone argument std::string timezone; @@ -652,7 +652,7 @@ public: {optional_argument_names[0], static_cast(&isString), isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); /// Optional precision argument auto precision = DEFAULT_PRECISION; diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 162b8c58873..339eb4cb26c 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -589,7 +589,7 @@ namespace {"timezone", static_cast(&isString), &isColumnConst, "const String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); String time_zone_name = getTimeZone(arguments).getTimeZone(); DataTypePtr date_type = std::make_shared(time_zone_name); diff --git a/src/Functions/parseReadableSize.cpp b/src/Functions/parseReadableSize.cpp index f5c2c53439b..1abcf7f164f 100644 --- a/src/Functions/parseReadableSize.cpp +++ b/src/Functions/parseReadableSize.cpp @@ -68,7 +68,7 @@ public: { {"readable_size", static_cast(&isString), nullptr, "String"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); DataTypePtr return_type = std::make_shared(); if constexpr (error_handling == ErrorHandling::Null) return std::make_shared(return_type); diff --git a/src/Functions/regexpExtract.cpp b/src/Functions/regexpExtract.cpp index cfb42580cb0..3cc5393296c 100644 --- a/src/Functions/regexpExtract.cpp +++ b/src/Functions/regexpExtract.cpp @@ -54,7 +54,7 @@ public: if (arguments.size() == 3) args.emplace_back(FunctionArgumentDescriptor{"index", static_cast(&isInteger), nullptr, "Integer"}); - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/repeat.cpp b/src/Functions/repeat.cpp index 7f2fe646062..aa90bf2490d 100644 --- a/src/Functions/repeat.cpp +++ b/src/Functions/repeat.cpp @@ -201,7 +201,7 @@ public: {"n", static_cast(&isInteger), nullptr, "Integer"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/seriesDecomposeSTL.cpp b/src/Functions/seriesDecomposeSTL.cpp index 618808b64ed..720aa1e0799 100644 --- a/src/Functions/seriesDecomposeSTL.cpp +++ b/src/Functions/seriesDecomposeSTL.cpp @@ -45,7 +45,7 @@ public: {"time_series", static_cast(&isArray), nullptr, "Array"}, {"period", static_cast(&isNativeUInt), nullptr, "Unsigned Integer"}, }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared(std::make_shared())); } diff --git a/src/Functions/seriesOutliersDetectTukey.cpp b/src/Functions/seriesOutliersDetectTukey.cpp index 81fc904e16e..4063d0ab85b 100644 --- a/src/Functions/seriesOutliersDetectTukey.cpp +++ b/src/Functions/seriesOutliersDetectTukey.cpp @@ -51,7 +51,7 @@ public: {"max_percentile", static_cast(&isFloat), isColumnConst, "Number"}, {"k", static_cast(&isNativeNumber), isColumnConst, "Number"}}; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp index 5ff8a636058..31ea6a28ece 100644 --- a/src/Functions/snowflake.cpp +++ b/src/Functions/snowflake.cpp @@ -64,7 +64,7 @@ public: FunctionArgumentDescriptors args{ {"value", static_cast(&isDateTime), nullptr, "DateTime"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } @@ -121,7 +121,7 @@ public: FunctionArgumentDescriptors optional_args{ {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); String timezone; if (arguments.size() == 2) @@ -190,7 +190,7 @@ public: FunctionArgumentDescriptors args{ {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } @@ -255,7 +255,7 @@ public: FunctionArgumentDescriptors optional_args{ {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); String timezone; if (arguments.size() == 2) diff --git a/src/Functions/snowflakeIDToDateTime.cpp b/src/Functions/snowflakeIDToDateTime.cpp index b799792a56f..9a1d5b8a74b 100644 --- a/src/Functions/snowflakeIDToDateTime.cpp +++ b/src/Functions/snowflakeIDToDateTime.cpp @@ -56,7 +56,7 @@ public: {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"}, {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); String timezone; if (arguments.size() == 3) @@ -127,7 +127,7 @@ public: {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"}, {"time_zone", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args, optional_args); + validateFunctionArguments(*this, arguments, args, optional_args); String timezone; if (arguments.size() == 3) diff --git a/src/Functions/space.cpp b/src/Functions/space.cpp index 83183c991bc..ce12f2f541c 100644 --- a/src/Functions/space.cpp +++ b/src/Functions/space.cpp @@ -48,7 +48,7 @@ public: {"n", static_cast(&isInteger), nullptr, "Integer"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/timestamp.cpp b/src/Functions/timestamp.cpp index fbca08b0968..6f2bd2030d5 100644 --- a/src/Functions/timestamp.cpp +++ b/src/Functions/timestamp.cpp @@ -46,7 +46,7 @@ public: FunctionArgumentDescriptors optional_args{ {"time", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(DATETIME_SCALE); } diff --git a/src/Functions/toDecimalString.cpp b/src/Functions/toDecimalString.cpp index fc621b272de..4ee664ad237 100644 --- a/src/Functions/toDecimalString.cpp +++ b/src/Functions/toDecimalString.cpp @@ -43,7 +43,7 @@ public: {"precision", static_cast(&isNativeInteger), &isColumnConst, "const Integer"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args, {}); + validateFunctionArguments(*this, arguments, mandatory_args, {}); return std::make_shared(); } From 1821638d5e3e2ee8fbea278c67e6d757f03c4253 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 16:38:00 +0000 Subject: [PATCH 162/273] Replace validateArgumentType() by validateFunctionArguments() --- src/Functions/FunctionHelpers.cpp | 14 -------------- src/Functions/FunctionHelpers.h | 6 ------ src/Functions/geohashDecode.cpp | 7 +++++-- src/Functions/geohashEncode.cpp | 22 +++++++++------------- src/Functions/geohashesInBox.cpp | 25 ++++++++++++++----------- 5 files changed, 28 insertions(+), 46 deletions(-) diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index 0027f9f281f..236afc5ecbf 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -95,20 +95,6 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName return res; } -void validateArgumentType(const IFunction & func, const DataTypes & arguments, - size_t argument_index, bool (* validator_func)(const IDataType &), - const char * type_name) -{ - if (arguments.size() <= argument_index) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of arguments of function {}", - func.getName()); - - const auto & argument = arguments[argument_index]; - if (!validator_func(*argument)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {}, expected {}", - argument->getName(), argument_index, func.getName(), type_name); -} - namespace { void validateArgumentsImpl(const IFunction & func, diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h index c08eb5265c1..4f93b236bcb 100644 --- a/src/Functions/FunctionHelpers.h +++ b/src/Functions/FunctionHelpers.h @@ -115,12 +115,6 @@ ColumnWithTypeAndName columnGetNested(const ColumnWithTypeAndName & col); /// column if it is nullable. ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName & columns); -/// Checks argument type at specified index with predicate. -/// throws if there is no argument at specified index or if predicate returns false. -void validateArgumentType(const IFunction & func, const DataTypes & arguments, - size_t argument_index, bool (* validator_func)(const IDataType &), - const char * type_name); - /// Expected arguments for a function. Can be used in conjunction with validateFunctionArguments() to check that the user-provided /// arguments match the expected arguments. struct FunctionArgumentDescriptor diff --git a/src/Functions/geohashDecode.cpp b/src/Functions/geohashDecode.cpp index b2454f5dffc..96ad7dacfc4 100644 --- a/src/Functions/geohashDecode.cpp +++ b/src/Functions/geohashDecode.cpp @@ -38,9 +38,12 @@ public: bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - validateArgumentType(*this, arguments, 0, isStringOrFixedString, "string or fixed string"); + FunctionArgumentDescriptors args{ + {"encoded", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"} + }; + validateFunctionArguments(*this, arguments, args); return std::make_shared( DataTypes{std::make_shared(), std::make_shared()}, diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index 7c353b822aa..034c8188b63 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; } namespace @@ -40,19 +39,16 @@ public: bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - validateArgumentType(*this, arguments, 0, isFloat, "float"); - validateArgumentType(*this, arguments, 1, isFloat, "float"); - if (arguments.size() == 3) - { - validateArgumentType(*this, arguments, 2, isInteger, "integer"); - } - if (arguments.size() > 3) - { - throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Too many arguments for function {} expected at most 3", - getName()); - } + FunctionArgumentDescriptors mandatory_args{ + {"longitude", static_cast(&isFloat), nullptr, "Float*"}, + {"latitude", static_cast(&isFloat), nullptr, "Float*"} + }; + FunctionArgumentDescriptors optional_args{ + {"precision", static_cast(&isInteger), nullptr, "(U)Int*"} + }; + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } diff --git a/src/Functions/geohashesInBox.cpp b/src/Functions/geohashesInBox.cpp index ac8d4a6ad8f..9429903dda7 100644 --- a/src/Functions/geohashesInBox.cpp +++ b/src/Functions/geohashesInBox.cpp @@ -35,22 +35,25 @@ public: size_t getNumberOfArguments() const override { return 5; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - validateArgumentType(*this, arguments, 0, isFloat, "float"); - validateArgumentType(*this, arguments, 1, isFloat, "float"); - validateArgumentType(*this, arguments, 2, isFloat, "float"); - validateArgumentType(*this, arguments, 3, isFloat, "float"); - validateArgumentType(*this, arguments, 4, isUInt8, "integer"); + FunctionArgumentDescriptors args{ + {"longitute_min", static_cast(&isFloat), nullptr, "Float*"}, + {"latitude_min", static_cast(&isFloat), nullptr, "Float*"}, + {"longitute_max", static_cast(&isFloat), nullptr, "Float*"}, + {"latitude_max", static_cast(&isFloat), nullptr, "Float*"}, + {"precision", static_cast(&isUInt8), nullptr, "UInt8"} + }; + validateFunctionArguments(*this, arguments, args); - if (!(arguments[0]->equals(*arguments[1]) && - arguments[0]->equals(*arguments[2]) && - arguments[0]->equals(*arguments[3]))) + if (!(arguments[0].type->equals(*arguments[1].type) && + arguments[0].type->equals(*arguments[2].type) && + arguments[0].type->equals(*arguments[3].type))) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of argument of {} all coordinate arguments must have the same type, " - "instead they are:{}, {}, {}, {}.", getName(), arguments[0]->getName(), - arguments[1]->getName(), arguments[2]->getName(), arguments[3]->getName()); + "instead they are:{}, {}, {}, {}.", getName(), arguments[0].type->getName(), + arguments[1].type->getName(), arguments[2].type->getName(), arguments[3].type->getName()); } return std::make_shared(std::make_shared()); From 659020dc8695e974241723f1fa49fc66bcc1c478 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 17:42:33 +0000 Subject: [PATCH 163/273] More aesthetic error messages --- src/Functions/FunctionHelpers.cpp | 60 +++++++++++++++---------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index 236afc5ecbf..b30f38d3d76 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -97,6 +97,19 @@ ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName namespace { + +String withOrdinalEnding(size_t i) +{ + switch (i) + { + case 0: return "1st"; + case 1: return "2nd"; + case 2: return "3rd"; + default: return std::to_string(i) + "th"; + } + +} + void validateArgumentsImpl(const IFunction & func, const ColumnsWithTypeAndName & arguments, size_t argument_offset, @@ -112,12 +125,12 @@ void validateArgumentsImpl(const IFunction & func, const auto & descriptor = descriptors[i]; if (int error_code = descriptor.isValid(arg.type, arg.column); error_code != 0) throw Exception(error_code, - "Illegal type of argument #{}{} of function {}{}{}", - argument_offset + i + 1, // +1 is for human-friendly 1-based indexing - " '" + String(descriptor.name) + "'", + "A value of illegal type was provided as {} argument '{}' to function '{}'. Expected: {}, got: {}", + withOrdinalEnding(argument_offset + i), + descriptor.name, func.getName(), - String(", expected ") + String(descriptor.type_name), - arg.type ? ", got " + arg.type->getName() : String{}); + descriptor.type_name, + arg.type ? arg.type->getName() : ""); } } @@ -144,34 +157,19 @@ void validateFunctionArguments(const IFunction & func, { if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size()) { - auto join_argument_types = [](const auto & args, const String sep = ", ") - { - String result; - for (const auto & a : args) - { - using A = std::decay_t; - if constexpr (std::is_same_v) - { - result += "'" + String(a.name) + "' : "; - result += a.type_name; - } - else if constexpr (std::is_same_v) - result += a.type->getName(); - - result += sep; - } - - if (!args.empty()) - result.erase(result.end() - sep.length(), result.end()); - - return result; - }; + auto argument_singular_or_plural = [](const auto & args){ return fmt::format("argument{}", args.size() != 1 ? "s" : ""); }; throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Incorrect number of arguments for function {} provided {}{}, expected {}{} ({}{})", - func.getName(), arguments.size(), (!arguments.empty() ? " (" + join_argument_types(arguments) + ")" : String{}), - mandatory_args.size(), (!optional_args.empty() ? " to " + std::to_string(mandatory_args.size() + optional_args.size()) : ""), - join_argument_types(mandatory_args), (!optional_args.empty() ? ", [" + join_argument_types(optional_args) + "]" : "")); + "An incorrect number of arguments was specified for function '{}'. Expected {}, got {}", + func.getName(), + (!mandatory_args.empty() && !optional_args.empty()) + ? fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args)) + : (!mandatory_args.empty() && optional_args.empty()) + ? fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)) /// intentionally not "_mandatory_ arguments" + : (mandatory_args.empty() && !optional_args.empty()) + ? fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args)) + : "0 arguments", + fmt::format("{} {}", arguments.size(), argument_singular_or_plural(arguments))); } validateArgumentsImpl(func, arguments, 0, mandatory_args); From d2cade4aa38be9f33715d593cb2e0d549c9f565e Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 2 Jul 2024 20:11:06 +0200 Subject: [PATCH 164/273] Relax the check in 02982_aggregation_states_destruction --- .../queries/0_stateless/02982_aggregation_states_destruction.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh index 263a4535c0e..84183606d48 100755 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh +++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh @@ -11,4 +11,4 @@ $CLICKHOUSE_CLIENT --query_id $query_id --log_query_threads 1 --query="select nu $CLICKHOUSE_CLIENT -q "system flush logs;" -$CLICKHOUSE_CLIENT -q "select count() > 0, (countIf(thread_name = 'AggregDestruct') as aggs) > 0, aggs > 1 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase();" +$CLICKHOUSE_CLIENT -q "select count() > 0 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase() and thread_name = 'AggregDestruct';" From 073471530b1e6bc8f08b959b4071cd0a376f24e1 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 2 Jul 2024 20:30:46 +0200 Subject: [PATCH 165/273] fix test --- tests/queries/0_stateless/01158_zookeeper_log_long.sql | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01158_zookeeper_log_long.sql b/tests/queries/0_stateless/01158_zookeeper_log_long.sql index 55d4162fc48..804cdf48fb6 100644 --- a/tests/queries/0_stateless/01158_zookeeper_log_long.sql +++ b/tests/queries/0_stateless/01158_zookeeper_log_long.sql @@ -29,14 +29,20 @@ select 'parts'; select type, has_watch, op_num, replace(path, toString(serverUUID()), ''), is_ephemeral, is_sequential, if(startsWith(path, '/clickhouse/sessions'), 1, version), requests_size, request_idx, error, watch_type, watch_state, path_created, stat_version, stat_cversion, stat_dataLength, stat_numChildren from system.zookeeper_log -where (session_id, xid) in (select session_id, xid from system.zookeeper_log where path='/test/01158/' || currentDatabase() || '/rmt/replicas/1/parts/all_0_0_0') +where (session_id, xid) in ( + select session_id, xid from system.zookeeper_log where path='/test/01158/' || currentDatabase() || '/rmt/replicas/1/parts/all_0_0_0' + and (query_id='' or query_id in (select query_id from system.query_log where current_database=currentDatabase() and event_date>=yesterday())) +) order by xid, type, request_idx; select 'blocks'; select type, has_watch, op_num, path, is_ephemeral, is_sequential, version, requests_size, request_idx, error, watch_type, watch_state, path_created, stat_version, stat_cversion, stat_dataLength, stat_numChildren from system.zookeeper_log -where (session_id, xid) in (select session_id, xid from system.zookeeper_log where path like '/test/01158/' || currentDatabase() || '/rmt/blocks/%' and op_num not in (1, 12, 500)) +where (session_id, xid) in ( + select session_id, xid from system.zookeeper_log where path like '/test/01158/' || currentDatabase() || '/rmt/blocks/%' and op_num not in (1, 12, 500) + and (query_id='' or query_id in (select query_id from system.query_log where current_database=currentDatabase() and event_date>=yesterday())) +) order by xid, type, request_idx; drop table rmt sync; From 0afccecd6b4048414d672918f5351fe80abd4548 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 19:05:23 +0000 Subject: [PATCH 166/273] Fix build --- src/Functions/FunctionBase64Conversion.h | 2 +- src/Functions/seriesPeriodDetectFFT.cpp | 2 +- src/Functions/sqid.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index 083179c3ca8..363b9ee3a31 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -202,7 +202,7 @@ public: {"value", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"} }; - validateFunctionArgumentTypes(*this, arguments, mandatory_arguments); + validateFunctionArguments(*this, arguments, mandatory_arguments); return std::make_shared(); } diff --git a/src/Functions/seriesPeriodDetectFFT.cpp b/src/Functions/seriesPeriodDetectFFT.cpp index e85b3a97c67..471354235d5 100644 --- a/src/Functions/seriesPeriodDetectFFT.cpp +++ b/src/Functions/seriesPeriodDetectFFT.cpp @@ -53,7 +53,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{{"time_series", static_cast(&isArray), nullptr, "Array"}}; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp index 6679646fef4..0e133590b84 100644 --- a/src/Functions/sqid.cpp +++ b/src/Functions/sqid.cpp @@ -100,7 +100,7 @@ public: FunctionArgumentDescriptors args{ {"sqid", static_cast(&isString), nullptr, "String"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); return std::make_shared(std::make_shared()); } From 54c4f02dca9fc0d33d717c7fb4122834ac214ae9 Mon Sep 17 00:00:00 2001 From: Justin de Guzman Date: Tue, 2 Jul 2024 15:22:48 -0700 Subject: [PATCH 167/273] [Docs] Better wording for behavior of MATERIALIZED expr --- docs/en/sql-reference/statements/create/table.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 0253bc647e6..b866d0b9f5f 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -152,7 +152,7 @@ SELECT * FROM test; `MATERIALIZED expr` -Materialized expression. Values of such columns are always calculated, they cannot be specified in INSERT queries. +Materialized expression. Values of such columns are automatically calculated according to the specified materialized expression when rows are inserted. Values cannot be explicitly specified during `INSERT`s. Also, default value columns of this type are not included in the result of `SELECT *`. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. This behavior can be disabled with setting `asterisk_include_materialized_columns`. From bcf8a93a52204cb80a867c237f302221bb51c272 Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 3 Jul 2024 01:34:25 -0400 Subject: [PATCH 168/273] `max_query_length` argument for the fuzzQuery --- .../table-functions/fuzzQuery.md | 3 ++- src/Storages/StorageFuzzQuery.cpp | 20 +++++++++++++------ src/Storages/StorageFuzzQuery.h | 1 + .../03031_table_function_fuzzquery.sql | 4 ++-- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/docs/en/sql-reference/table-functions/fuzzQuery.md b/docs/en/sql-reference/table-functions/fuzzQuery.md index ff8cfd1cd3b..e15f8a40156 100644 --- a/docs/en/sql-reference/table-functions/fuzzQuery.md +++ b/docs/en/sql-reference/table-functions/fuzzQuery.md @@ -9,12 +9,13 @@ sidebar_label: fuzzQuery Perturbs the given query string with random variations. ``` sql -fuzzQuery(query[, random_seed]) +fuzzQuery(query[, max_query_length[, random_seed]]) ``` **Arguments** - `query` (String) - The source query to perform the fuzzing on. +- `max_query_length` (UInt64) - A maximum length the query can get during the fuzzing process. - `random_seed` (UInt64) - A random seed for producing stable results. **Returned Value** diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp index 5e29a04427b..229ae1af7c1 100644 --- a/src/Storages/StorageFuzzQuery.cpp +++ b/src/Storages/StorageFuzzQuery.cpp @@ -47,7 +47,7 @@ ColumnPtr FuzzQuerySource::createColumn() size_t data_len = data.size(); /// AST is too long, will start from the original query. - if (data_len > 500) + if (config.max_query_length > 500) { fuzz_base = query; continue; @@ -120,10 +120,11 @@ StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine // Supported signatures: // - // FuzzQuery('query') - // FuzzQuery('query', 'random_seed') - if (engine_args.empty() || engine_args.size() > 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 2 arguments: query, random_seed"); + // FuzzQuery(query) + // FuzzQuery(query, max_query_length) + // FuzzQuery(query, max_query_length, random_seed) + if (engine_args.empty() || engine_args.size() > 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 3 arguments: query, max_query_length, random_seed"); for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); @@ -131,9 +132,16 @@ StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); configuration.query = std::move(first_arg); - if (engine_args.size() == 2) + if (engine_args.size() >= 2) { const auto & literal = engine_args[1]->as(); + if (!literal.value.isNull()) + configuration.max_query_length = checkAndGetLiteralArgument(literal, "max_query_length"); + } + + if (engine_args.size() == 3) + { + const auto & literal = engine_args[2]->as(); if (!literal.value.isNull()) configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); } diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h index 3ae506fdfb8..125ef960e74 100644 --- a/src/Storages/StorageFuzzQuery.h +++ b/src/Storages/StorageFuzzQuery.h @@ -18,6 +18,7 @@ public: struct Configuration : public StatelessTableEngineConfiguration { String query; + UInt64 max_query_length = 500; UInt64 random_seed = randomSeed(); }; diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql index 5821e2e5111..b26096f7f0e 100644 --- a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql @@ -1,5 +1,5 @@ -SELECT * FROM fuzzQuery('SELECT 1', 8956) LIMIT 0 FORMAT TSVWithNamesAndTypes; +SELECT * FROM fuzzQuery('SELECT 1', 500, 8956) LIMIT 0 FORMAT TSVWithNamesAndTypes; SELECT * FROM fuzzQuery('SELECT * FROM ( @@ -15,4 +15,4 @@ FROM ( ) AS r ON l.item_id = r.item_id ORDER BY 1,2,3; -', 8956) LIMIT 10 FORMAT NULL; +', 500, 8956) LIMIT 10 FORMAT NULL; From 6079373ce3ef1107bc7ea634c6d1e1ceac24744d Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 08:36:16 +0000 Subject: [PATCH 169/273] Incorporate review feedback --- src/Functions/FunctionHelpers.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index b30f38d3d76..c658063b66f 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -157,18 +157,22 @@ void validateFunctionArguments(const IFunction & func, { if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size()) { - auto argument_singular_or_plural = [](const auto & args){ return fmt::format("argument{}", args.size() != 1 ? "s" : ""); }; + auto argument_singular_or_plural = [](const auto & args) -> std::string_view { return args.size() == 1 ? "argument" : "arguments"; }; + + String expected_args_string; + if (!mandatory_args.empty() && !optional_args.empty()) + expected_args_string = fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args)); + else if (!mandatory_args.empty() && optional_args.empty()) + expected_args_string = fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)); /// intentionally not "_mandatory_ arguments" + else if (mandatory_args.empty() && !optional_args.empty()) + expected_args_string = fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args)); + else + expected_args_string = "0 arguments"; throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "An incorrect number of arguments was specified for function '{}'. Expected {}, got {}", func.getName(), - (!mandatory_args.empty() && !optional_args.empty()) - ? fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args)) - : (!mandatory_args.empty() && optional_args.empty()) - ? fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)) /// intentionally not "_mandatory_ arguments" - : (mandatory_args.empty() && !optional_args.empty()) - ? fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args)) - : "0 arguments", + expected_args_string, fmt::format("{} {}", arguments.size(), argument_singular_or_plural(arguments))); } From 1f309ef342360ba2207a2ed1e7eb87c0eaa9cfde Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 3 Jul 2024 11:03:32 +0200 Subject: [PATCH 170/273] Bump From f2ffd727f002702ab29dff7c2d18ceaaf8e09e6e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 3 Jul 2024 11:04:04 +0200 Subject: [PATCH 171/273] Bump From c86cdbb243c9093e3eb59134de08beb42f1d4c02 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 11:05:34 +0200 Subject: [PATCH 172/273] Remove scary jemalloc log --- programs/server/Server.cpp | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 4cb3b5f45c7..f992fdc13a9 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -133,10 +133,6 @@ # include #endif -#if USE_JEMALLOC -# include -#endif - #if USE_AZURE_BLOB_STORAGE # include # include @@ -176,34 +172,10 @@ namespace ProfileEvents namespace fs = std::filesystem; -#if USE_JEMALLOC -static bool jemallocOptionEnabled(const char *name) -{ - bool value; - size_t size = sizeof(value); - - if (mallctl(name, reinterpret_cast(&value), &size, /* newp= */ nullptr, /* newlen= */ 0)) - throw Poco::SystemException("mallctl() failed"); - - return value; -} -#else -static bool jemallocOptionEnabled(const char *) { return false; } -#endif - int mainEntryClickHouseServer(int argc, char ** argv) { DB::Server app; - if (jemallocOptionEnabled("opt.background_thread")) - { - LOG_ERROR(&app.logger(), - "jemalloc.background_thread was requested, " - "however ClickHouse uses percpu_arena and background_thread most likely will not give any benefits, " - "and also background_thread is not compatible with ClickHouse watchdog " - "(that can be disabled with CLICKHOUSE_WATCHDOG_ENABLE=0)"); - } - /// Do not fork separate process from watchdog if we attached to terminal. /// Otherwise it breaks gdb usage. /// Can be overridden by environment variable (cannot use server config at this moment). From cfafbc388cb1ac3ca7c5c6810f3f5c00f3b8b3d5 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 3 Jul 2024 11:56:07 +0200 Subject: [PATCH 173/273] Fix test_drop_table --- .../ObjectStorageQueue/ObjectStorageQueueSource.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 683a7038bb6..dc5fb6d2744 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -422,8 +422,14 @@ Chunk ObjectStorageQueueSource::generate() Chunk ObjectStorageQueueSource::generateImpl() { - while (!shutdown_called) + while (true) { + if (shutdown_called) + { + LOG_TRACE(log, "Shutdown was called, stopping sync"); + break; + } + if (!reader) { const auto context = getContext(); From 198b80b6a252ef25f8b4f269a53c39ec5ae0e76f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 27 Jun 2024 16:36:43 +0000 Subject: [PATCH 174/273] Cosmetics No code was harmed in the process (really just cosmetics). --- src/Storages/Statistics/Statistics.cpp | 40 +++++++++---------- src/Storages/Statistics/Statistics.h | 28 +++++-------- src/Storages/Statistics/TDigestStatistics.cpp | 34 ++++++++-------- src/Storages/Statistics/TDigestStatistics.h | 8 +--- src/Storages/Statistics/UniqStatistics.cpp | 2 +- src/Storages/Statistics/UniqStatistics.h | 3 +- 6 files changed, 48 insertions(+), 67 deletions(-) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index fed0bd61c03..a4c57c9eef4 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -1,6 +1,3 @@ -#include -#include - #include #include #include @@ -10,6 +7,8 @@ #include #include #include +#include + namespace DB { @@ -20,7 +19,6 @@ namespace ErrorCodes extern const int INCORRECT_QUERY; } -/// Version / bitmask of statistics / data of statistics / enum StatisticsFileVersion : UInt16 { V0 = 0, @@ -29,17 +27,15 @@ enum StatisticsFileVersion : UInt16 IStatistics::IStatistics(const SingleStatisticsDescription & stat_) : stat(stat_) {} ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_) - : stats_desc(stats_desc_), rows(0) + : stats_desc(stats_desc_) { } void ColumnStatistics::update(const ColumnPtr & column) { rows += column->size(); - for (const auto & iter : stats) - { - iter.second->update(column); - } + for (const auto & stat : stats) + stat.second->update(column); } Float64 ColumnStatistics::estimateLess(Float64 val) const @@ -76,14 +72,17 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const void ColumnStatistics::serialize(WriteBuffer & buf) { writeIntBinary(V0, buf); + UInt64 stat_types_mask = 0; for (const auto & [type, _]: stats) stat_types_mask |= 1 << UInt8(type); writeIntBinary(stat_types_mask, buf); - /// We write some basic statistics + + /// store the column row count as it is always useful writeIntBinary(rows, buf); - /// We write complex statistics - for (const auto & [type, stat_ptr]: stats) + + /// write the actual statistics object + for (const auto & [type, stat_ptr] : stats) stat_ptr->serialize(buf); } @@ -96,7 +95,9 @@ void ColumnStatistics::deserialize(ReadBuffer &buf) UInt64 stat_types_mask = 0; readIntBinary(stat_types_mask, buf); + readIntBinary(rows, buf); + for (auto it = stats.begin(); it != stats.end();) { if (!(stat_types_mask & 1 << UInt8(it->first))) @@ -136,15 +137,15 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va { if (!validators.emplace(stats_type, std::move(validator)).second) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticsFactory: the statistics validator type {} is not unique", stats_type); - } MergeTreeStatisticsFactory::MergeTreeStatisticsFactory() { - registerCreator(StatisticsType::TDigest, TDigestCreator); - registerCreator(StatisticsType::Uniq, UniqCreator); registerValidator(StatisticsType::TDigest, TDigestValidator); + registerCreator(StatisticsType::TDigest, TDigestCreator); + registerValidator(StatisticsType::Uniq, UniqValidator); + registerCreator(StatisticsType::Uniq, UniqCreator); } MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance() @@ -159,9 +160,7 @@ void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & st { auto it = validators.find(type); if (it == validators.end()) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown Statistic type '{}'", type); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistic type '{}'", type); it->second(desc, data_type); } } @@ -173,10 +172,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescri { auto it = creators.find(type); if (it == creators.end()) - { - throw Exception(ErrorCodes::INCORRECT_QUERY, - "Unknown Statistic type '{}'. Available types: tdigest, uniq", type); - } + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq'", type); auto stat_ptr = (it->second)(desc, stats.data_type); column_stat->stats[type] = stat_ptr; } diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index 2ab1337af02..5e756e48d42 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -1,19 +1,15 @@ #pragma once -#include -#include - #include -#include #include #include #include +#include namespace DB { -/// this is for user-defined statistic. constexpr auto STATS_FILE_PREFIX = "statistics_"; constexpr auto STATS_FILE_SUFFIX = ".stats"; @@ -25,11 +21,9 @@ class IStatistics { public: explicit IStatistics(const SingleStatisticsDescription & stat_); - virtual ~IStatistics() = default; virtual void serialize(WriteBuffer & buf) = 0; - virtual void deserialize(ReadBuffer & buf) = 0; virtual void update(const ColumnPtr & column) = 0; @@ -43,11 +37,12 @@ using StatisticsPtr = std::shared_ptr; class ColumnStatistics { public: - explicit ColumnStatistics(const ColumnStatisticsDescription & stats_); + explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_); + void serialize(WriteBuffer & buf); void deserialize(ReadBuffer & buf); - String getFileName() const; + String getFileName() const; const String & columnName() const; UInt64 rowCount() const; @@ -55,17 +50,14 @@ public: void update(const ColumnPtr & column); Float64 estimateLess(Float64 val) const; - Float64 estimateGreater(Float64 val) const; - Float64 estimateEqual(Float64 val) const; private: - friend class MergeTreeStatisticsFactory; ColumnStatisticsDescription stats_desc; std::map stats; - UInt64 rows; /// the number of rows of the column + UInt64 rows = 0; /// the number of rows in the column }; class ColumnsDescription; @@ -79,25 +71,23 @@ public: void validate(const ColumnStatisticsDescription & stats, DataTypePtr data_type) const; + using Validator = std::function; using Creator = std::function; - using Validator = std::function; - ColumnStatisticsPtr get(const ColumnStatisticsDescription & stats) const; - ColumnsStatistics getMany(const ColumnsDescription & columns) const; - void registerCreator(StatisticsType type, Creator creator); void registerValidator(StatisticsType type, Validator validator); + void registerCreator(StatisticsType type, Creator creator); protected: MergeTreeStatisticsFactory(); private: - using Creators = std::unordered_map; using Validators = std::unordered_map; - Creators creators; + using Creators = std::unordered_map; Validators validators; + Creators creators; }; } diff --git a/src/Storages/Statistics/TDigestStatistics.cpp b/src/Storages/Statistics/TDigestStatistics.cpp index aa5662c979d..2f254b604e4 100644 --- a/src/Storages/Statistics/TDigestStatistics.cpp +++ b/src/Storages/Statistics/TDigestStatistics.cpp @@ -8,53 +8,53 @@ namespace ErrorCodes extern const int ILLEGAL_STATISTICS; } -TDigestStatistics::TDigestStatistics(const SingleStatisticsDescription & stat_): - IStatistics(stat_) +TDigestStatistics::TDigestStatistics(const SingleStatisticsDescription & stat_) + : IStatistics(stat_) { } Float64 TDigestStatistics::estimateLess(Float64 val) const { - return data.getCountLessThan(val); + return t_digest.getCountLessThan(val); } Float64 TDigestStatistics::estimateEqual(Float64 val) const { - return data.getCountEqual(val); + return t_digest.getCountEqual(val); } void TDigestStatistics::serialize(WriteBuffer & buf) { - data.serialize(buf); + t_digest.serialize(buf); } void TDigestStatistics::deserialize(ReadBuffer & buf) { - data.deserialize(buf); + t_digest.deserialize(buf); } void TDigestStatistics::update(const ColumnPtr & column) { - size_t size = column->size(); + size_t rows = column->size(); - for (size_t i = 0; i < size; ++i) + for (size_t row = 0; row < rows; ++row) { /// TODO: support more types. - Float64 value = column->getFloat64(i); - data.add(value, 1); + Float64 value = column->getFloat64(row); + t_digest.add(value, 1); } } +void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +{ + data_type = removeNullable(data_type); + if (!data_type->isValueRepresentedByNumber()) + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName()); +} + StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) { return std::make_shared(stat); } -void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) -{ - data_type = removeNullable(data_type); - if (!data_type->isValueRepresentedByNumber()) - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' does not support type {}", data_type->getName()); -} - } diff --git a/src/Storages/Statistics/TDigestStatistics.h b/src/Storages/Statistics/TDigestStatistics.h index 7c361b8751f..2e29becc5ee 100644 --- a/src/Storages/Statistics/TDigestStatistics.h +++ b/src/Storages/Statistics/TDigestStatistics.h @@ -6,27 +6,23 @@ namespace DB { - -/// TDigestStatistic is a kind of histogram. class TDigestStatistics : public IStatistics { public: explicit TDigestStatistics(const SingleStatisticsDescription & stat_); Float64 estimateLess(Float64 val) const; - Float64 estimateEqual(Float64 val) const; void serialize(WriteBuffer & buf) override; - void deserialize(ReadBuffer & buf) override; void update(const ColumnPtr & column) override; private: - QuantileTDigest data; + QuantileTDigest t_digest; }; -StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); } diff --git a/src/Storages/Statistics/UniqStatistics.cpp b/src/Storages/Statistics/UniqStatistics.cpp index fc748e769ca..2f7a75db504 100644 --- a/src/Storages/Statistics/UniqStatistics.cpp +++ b/src/Storages/Statistics/UniqStatistics.cpp @@ -55,7 +55,7 @@ void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); if (!data_type->isValueRepresentedByNumber()) - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' does not support type {}", data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName()); } StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) diff --git a/src/Storages/Statistics/UniqStatistics.h b/src/Storages/Statistics/UniqStatistics.h index 0d86a6e458a..bf097620a86 100644 --- a/src/Storages/Statistics/UniqStatistics.h +++ b/src/Storages/Statistics/UniqStatistics.h @@ -17,7 +17,6 @@ public: UInt64 getCardinality(); void serialize(WriteBuffer & buf) override; - void deserialize(ReadBuffer & buf) override; void update(const ColumnPtr & column) override; @@ -30,7 +29,7 @@ private: }; -StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); } From 337871e0ec0f8d1c6c89d5b0d39977ea689adc22 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 27 Jun 2024 18:02:58 +0000 Subject: [PATCH 175/273] Move some methods around Makes the order of methods within classes consistent. Did not touch the code itself. --- src/Storages/Statistics/Statistics.cpp | 5 +++- src/Storages/Statistics/Statistics.h | 4 +-- src/Storages/Statistics/TDigestStatistics.cpp | 28 +++++++++---------- src/Storages/Statistics/TDigestStatistics.h | 7 +++-- src/Storages/Statistics/UniqStatistics.cpp | 20 ++++++------- src/Storages/Statistics/UniqStatistics.h | 6 ++-- 6 files changed, 36 insertions(+), 34 deletions(-) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index a4c57c9eef4..5666f0bbf18 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -24,7 +24,10 @@ enum StatisticsFileVersion : UInt16 V0 = 0, }; -IStatistics::IStatistics(const SingleStatisticsDescription & stat_) : stat(stat_) {} +IStatistics::IStatistics(const SingleStatisticsDescription & stat_) + : stat(stat_) +{ +} ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_) : stats_desc(stats_desc_) diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index 5e756e48d42..4af7c423257 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -23,11 +23,11 @@ public: explicit IStatistics(const SingleStatisticsDescription & stat_); virtual ~IStatistics() = default; + virtual void update(const ColumnPtr & column) = 0; + virtual void serialize(WriteBuffer & buf) = 0; virtual void deserialize(ReadBuffer & buf) = 0; - virtual void update(const ColumnPtr & column) = 0; - protected: SingleStatisticsDescription stat; }; diff --git a/src/Storages/Statistics/TDigestStatistics.cpp b/src/Storages/Statistics/TDigestStatistics.cpp index 2f254b604e4..0e2cc8bac6d 100644 --- a/src/Storages/Statistics/TDigestStatistics.cpp +++ b/src/Storages/Statistics/TDigestStatistics.cpp @@ -13,14 +13,16 @@ TDigestStatistics::TDigestStatistics(const SingleStatisticsDescription & stat_) { } -Float64 TDigestStatistics::estimateLess(Float64 val) const +void TDigestStatistics::update(const ColumnPtr & column) { - return t_digest.getCountLessThan(val); -} + size_t rows = column->size(); -Float64 TDigestStatistics::estimateEqual(Float64 val) const -{ - return t_digest.getCountEqual(val); + for (size_t row = 0; row < rows; ++row) + { + /// TODO: support more types. + Float64 value = column->getFloat64(row); + t_digest.add(value, 1); + } } void TDigestStatistics::serialize(WriteBuffer & buf) @@ -33,16 +35,14 @@ void TDigestStatistics::deserialize(ReadBuffer & buf) t_digest.deserialize(buf); } -void TDigestStatistics::update(const ColumnPtr & column) +Float64 TDigestStatistics::estimateLess(Float64 val) const { - size_t rows = column->size(); + return t_digest.getCountLessThan(val); +} - for (size_t row = 0; row < rows; ++row) - { - /// TODO: support more types. - Float64 value = column->getFloat64(row); - t_digest.add(value, 1); - } +Float64 TDigestStatistics::estimateEqual(Float64 val) const +{ + return t_digest.getCountEqual(val); } void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) diff --git a/src/Storages/Statistics/TDigestStatistics.h b/src/Storages/Statistics/TDigestStatistics.h index 2e29becc5ee..a9fbc0410f3 100644 --- a/src/Storages/Statistics/TDigestStatistics.h +++ b/src/Storages/Statistics/TDigestStatistics.h @@ -11,13 +11,14 @@ class TDigestStatistics : public IStatistics public: explicit TDigestStatistics(const SingleStatisticsDescription & stat_); - Float64 estimateLess(Float64 val) const; - Float64 estimateEqual(Float64 val) const; + void update(const ColumnPtr & column) override; void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; - void update(const ColumnPtr & column) override; + Float64 estimateLess(Float64 val) const; + Float64 estimateEqual(Float64 val) const; + private: QuantileTDigest t_digest; }; diff --git a/src/Storages/Statistics/UniqStatistics.cpp b/src/Storages/Statistics/UniqStatistics.cpp index 2f7a75db504..267654656cd 100644 --- a/src/Storages/Statistics/UniqStatistics.cpp +++ b/src/Storages/Statistics/UniqStatistics.cpp @@ -25,11 +25,13 @@ UniqStatistics::~UniqStatistics() collector->destroy(data); } -UInt64 UniqStatistics::getCardinality() +void UniqStatistics::update(const ColumnPtr & column) { - auto column = DataTypeUInt64().createColumn(); - collector->insertResultInto(data, *column, nullptr); - return column->getUInt(0); + /// TODO(hanfei): For low cardinality, it's very slow to convert to full column. We can read the dictionary directly. + /// Here we intend to avoid crash in CI. + auto col_ptr = column->convertToFullColumnIfLowCardinality(); + const IColumn * raw_ptr = col_ptr.get(); + collector->addBatchSinglePlace(0, column->size(), data, &(raw_ptr), nullptr); } void UniqStatistics::serialize(WriteBuffer & buf) @@ -42,13 +44,11 @@ void UniqStatistics::deserialize(ReadBuffer & buf) collector->deserialize(data, buf); } -void UniqStatistics::update(const ColumnPtr & column) +UInt64 UniqStatistics::getCardinality() { - /// TODO(hanfei): For low cardinality, it's very slow to convert to full column. We can read the dictionary directly. - /// Here we intend to avoid crash in CI. - auto col_ptr = column->convertToFullColumnIfLowCardinality(); - const IColumn * raw_ptr = col_ptr.get(); - collector->addBatchSinglePlace(0, column->size(), data, &(raw_ptr), nullptr); + auto column = DataTypeUInt64().createColumn(); + collector->insertResultInto(data, *column, nullptr); + return column->getUInt(0); } void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) diff --git a/src/Storages/Statistics/UniqStatistics.h b/src/Storages/Statistics/UniqStatistics.h index bf097620a86..4f28f80f9cb 100644 --- a/src/Storages/Statistics/UniqStatistics.h +++ b/src/Storages/Statistics/UniqStatistics.h @@ -11,18 +11,16 @@ class UniqStatistics : public IStatistics { public: UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); - ~UniqStatistics() override; - UInt64 getCardinality(); + void update(const ColumnPtr & column) override; void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; - void update(const ColumnPtr & column) override; + UInt64 getCardinality(); private: - std::unique_ptr arena; AggregateFunctionPtr collector; AggregateDataPtr data; From 9f4e44bfc44a00dde015410d0c62e71b7cc000d1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 10:02:29 +0000 Subject: [PATCH 176/273] Rename XStatistics to StatisticsX Makes the naming more consistent with the rest of the codebase, e.g. - MergeTreeIndexSet - MergeTreeIndexMinMax or - StorageJoin - StorageMergeTree etc. --- src/Storages/Statistics/Statistics.cpp | 12 ++++++------ ...igestStatistics.cpp => StatisticsTDigest.cpp} | 16 ++++++++-------- .../{TDigestStatistics.h => StatisticsTDigest.h} | 4 ++-- .../{UniqStatistics.cpp => StatisticsUniq.cpp} | 16 ++++++++-------- .../{UniqStatistics.h => StatisticsUniq.h} | 6 +++--- src/Storages/Statistics/tests/gtest_stats.cpp | 2 +- 6 files changed, 28 insertions(+), 28 deletions(-) rename src/Storages/Statistics/{TDigestStatistics.cpp => StatisticsTDigest.cpp} (67%) rename src/Storages/Statistics/{TDigestStatistics.h => StatisticsTDigest.h} (84%) rename src/Storages/Statistics/{UniqStatistics.cpp => StatisticsUniq.cpp} (79%) rename src/Storages/Statistics/{UniqStatistics.h => StatisticsUniq.h} (82%) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index 5666f0bbf18..c454adccc06 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -1,7 +1,7 @@ #include #include -#include -#include +#include +#include #include #include #include @@ -44,7 +44,7 @@ void ColumnStatistics::update(const ColumnPtr & column) Float64 ColumnStatistics::estimateLess(Float64 val) const { if (stats.contains(StatisticsType::TDigest)) - return std::static_pointer_cast(stats.at(StatisticsType::TDigest))->estimateLess(val); + return std::static_pointer_cast(stats.at(StatisticsType::TDigest))->estimateLess(val); return rows * ConditionSelectivityEstimator::default_normal_cond_factor; } @@ -57,12 +57,12 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const { if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) { - auto uniq_static = std::static_pointer_cast(stats.at(StatisticsType::Uniq)); + auto statistics_uniq = std::static_pointer_cast(stats.at(StatisticsType::Uniq)); /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) /// for every bucket. - if (uniq_static->getCardinality() < 2048) + if (statistics_uniq->getCardinality() < 2048) { - auto tdigest_static = std::static_pointer_cast(stats.at(StatisticsType::TDigest)); + auto tdigest_static = std::static_pointer_cast(stats.at(StatisticsType::TDigest)); return tdigest_static->estimateEqual(val); } } diff --git a/src/Storages/Statistics/TDigestStatistics.cpp b/src/Storages/Statistics/StatisticsTDigest.cpp similarity index 67% rename from src/Storages/Statistics/TDigestStatistics.cpp rename to src/Storages/Statistics/StatisticsTDigest.cpp index 0e2cc8bac6d..0747197370c 100644 --- a/src/Storages/Statistics/TDigestStatistics.cpp +++ b/src/Storages/Statistics/StatisticsTDigest.cpp @@ -1,4 +1,4 @@ -#include +#include #include namespace DB @@ -8,12 +8,12 @@ namespace ErrorCodes extern const int ILLEGAL_STATISTICS; } -TDigestStatistics::TDigestStatistics(const SingleStatisticsDescription & stat_) +StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_) : IStatistics(stat_) { } -void TDigestStatistics::update(const ColumnPtr & column) +void StatisticsTDigest::update(const ColumnPtr & column) { size_t rows = column->size(); @@ -25,22 +25,22 @@ void TDigestStatistics::update(const ColumnPtr & column) } } -void TDigestStatistics::serialize(WriteBuffer & buf) +void StatisticsTDigest::serialize(WriteBuffer & buf) { t_digest.serialize(buf); } -void TDigestStatistics::deserialize(ReadBuffer & buf) +void StatisticsTDigest::deserialize(ReadBuffer & buf) { t_digest.deserialize(buf); } -Float64 TDigestStatistics::estimateLess(Float64 val) const +Float64 StatisticsTDigest::estimateLess(Float64 val) const { return t_digest.getCountLessThan(val); } -Float64 TDigestStatistics::estimateEqual(Float64 val) const +Float64 StatisticsTDigest::estimateEqual(Float64 val) const { return t_digest.getCountEqual(val); } @@ -54,7 +54,7 @@ void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) { - return std::make_shared(stat); + return std::make_shared(stat); } } diff --git a/src/Storages/Statistics/TDigestStatistics.h b/src/Storages/Statistics/StatisticsTDigest.h similarity index 84% rename from src/Storages/Statistics/TDigestStatistics.h rename to src/Storages/Statistics/StatisticsTDigest.h index a9fbc0410f3..f391d0b17e6 100644 --- a/src/Storages/Statistics/TDigestStatistics.h +++ b/src/Storages/Statistics/StatisticsTDigest.h @@ -6,10 +6,10 @@ namespace DB { -class TDigestStatistics : public IStatistics +class StatisticsTDigest : public IStatistics { public: - explicit TDigestStatistics(const SingleStatisticsDescription & stat_); + explicit StatisticsTDigest(const SingleStatisticsDescription & stat_); void update(const ColumnPtr & column) override; diff --git a/src/Storages/Statistics/UniqStatistics.cpp b/src/Storages/Statistics/StatisticsUniq.cpp similarity index 79% rename from src/Storages/Statistics/UniqStatistics.cpp rename to src/Storages/Statistics/StatisticsUniq.cpp index 267654656cd..4e24e5f0e96 100644 --- a/src/Storages/Statistics/UniqStatistics.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -10,7 +10,7 @@ namespace ErrorCodes extern const int ILLEGAL_STATISTICS; } -UniqStatistics::UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type) +StatisticsUniq::StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type) : IStatistics(stat_) { arena = std::make_unique(); @@ -20,12 +20,12 @@ UniqStatistics::UniqStatistics(const SingleStatisticsDescription & stat_, const collector->create(data); } -UniqStatistics::~UniqStatistics() +StatisticsUniq::~StatisticsUniq() { collector->destroy(data); } -void UniqStatistics::update(const ColumnPtr & column) +void StatisticsUniq::update(const ColumnPtr & column) { /// TODO(hanfei): For low cardinality, it's very slow to convert to full column. We can read the dictionary directly. /// Here we intend to avoid crash in CI. @@ -34,17 +34,17 @@ void UniqStatistics::update(const ColumnPtr & column) collector->addBatchSinglePlace(0, column->size(), data, &(raw_ptr), nullptr); } -void UniqStatistics::serialize(WriteBuffer & buf) +void StatisticsUniq::serialize(WriteBuffer & buf) { collector->serialize(data, buf); } -void UniqStatistics::deserialize(ReadBuffer & buf) +void StatisticsUniq::deserialize(ReadBuffer & buf) { collector->deserialize(data, buf); } -UInt64 UniqStatistics::getCardinality() +UInt64 StatisticsUniq::getCardinality() { auto column = DataTypeUInt64().createColumn(); collector->insertResultInto(data, *column, nullptr); @@ -60,7 +60,7 @@ void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) { - return std::make_shared(stat, data_type); + return std::make_shared(stat, data_type); } } diff --git a/src/Storages/Statistics/UniqStatistics.h b/src/Storages/Statistics/StatisticsUniq.h similarity index 82% rename from src/Storages/Statistics/UniqStatistics.h rename to src/Storages/Statistics/StatisticsUniq.h index 4f28f80f9cb..1c521fa9984 100644 --- a/src/Storages/Statistics/UniqStatistics.h +++ b/src/Storages/Statistics/StatisticsUniq.h @@ -7,11 +7,11 @@ namespace DB { -class UniqStatistics : public IStatistics +class StatisticsUniq : public IStatistics { public: - UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); - ~UniqStatistics() override; + StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); + ~StatisticsUniq() override; void update(const ColumnPtr & column) override; diff --git a/src/Storages/Statistics/tests/gtest_stats.cpp b/src/Storages/Statistics/tests/gtest_stats.cpp index f94f310be56..c3c14632ba1 100644 --- a/src/Storages/Statistics/tests/gtest_stats.cpp +++ b/src/Storages/Statistics/tests/gtest_stats.cpp @@ -1,6 +1,6 @@ #include -#include +#include TEST(Statistics, TDigestLessThan) { From 2cefa56f9b640f14b020660ec0296fe7bb6669a9 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Jul 2024 19:59:52 +0000 Subject: [PATCH 177/273] Update docs --- docs/en/engines/table-engines/mergetree-family/mergetree.md | 6 +++--- docs/en/sql-reference/statements/alter/statistics.md | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index f0c4e1b0e34..3826e4e9c94 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -993,11 +993,11 @@ They can be used for prewhere optimization only if we enable `set allow_statisti - `TDigest` - Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch. + [TDigest](https://github.com/tdunning/t-digest) sketches which allow to compute approximate percentiles (e.g. the 90th percentile) for numeric columns. - `Uniq` - - Estimate the number of distinct values of a column by HyperLogLog. + + [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains. ## Column-level Settings {#column-level-settings} diff --git a/docs/en/sql-reference/statements/alter/statistics.md b/docs/en/sql-reference/statements/alter/statistics.md index 80024781f88..6880cef0e5c 100644 --- a/docs/en/sql-reference/statements/alter/statistics.md +++ b/docs/en/sql-reference/statements/alter/statistics.md @@ -28,6 +28,6 @@ There is an example adding two statistics types to two columns: ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq; ``` -:::note +:::note Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). ::: From bd7e613b3b21589710e11b043577959ce340e2c5 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 30 Jun 2024 10:27:40 +0000 Subject: [PATCH 178/273] Minor cleanup of 02864_statistics_exception --- .../02864_statistics_exception.sql | 78 +++++++++---------- 1 file changed, 38 insertions(+), 40 deletions(-) diff --git a/tests/queries/0_stateless/02864_statistics_exception.sql b/tests/queries/0_stateless/02864_statistics_exception.sql index c531d39cd69..289ffee6600 100644 --- a/tests/queries/0_stateless/02864_statistics_exception.sql +++ b/tests/queries/0_stateless/02864_statistics_exception.sql @@ -1,57 +1,55 @@ -DROP TABLE IF EXISTS t1; +-- Tests creating/dropping/materializing statistics produces the right exceptions. -CREATE TABLE t1 +DROP TABLE IF EXISTS tab; + +-- Can't create statistics when allow_experimental_statistics = 0 +CREATE TABLE tab ( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - pk String, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + a Float64 STATISTICS(tdigest) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } SET allow_experimental_statistics = 1; -CREATE TABLE t1 +-- The same type of statistics can't exist more than once on a column +CREATE TABLE tab ( - a Float64 STATISTICS(tdigest), - b Int64, - pk String STATISTICS(tdigest), -) Engine = MergeTree() ORDER BY pk; -- { serverError ILLEGAL_STATISTICS } + a Float64 STATISTICS(tdigest, tdigest) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } -CREATE TABLE t1 +-- Unknown statistics types are rejected +CREATE TABLE tab ( - a Float64 STATISTICS(tdigest, tdigest(10)), - b Int64, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + a Float64 STATISTICS(no_statistics_type) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } -CREATE TABLE t1 +-- tDigest statistics can only be created on numeric columns +CREATE TABLE tab ( - a Float64 STATISTICS(xyz), - b Int64, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + a String STATISTICS(tdigest), +) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE t1 +CREATE TABLE tab ( a Float64, - b Int64, - pk String, -) Engine = MergeTree() ORDER BY pk; + b String +) Engine = MergeTree() ORDER BY tuple(); -ALTER TABLE t1 ADD STATISTICS a TYPE xyz; -- { serverError INCORRECT_QUERY } -ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS IF NOT EXISTS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS a TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS a TYPE tdigest; +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; -- Statistics can be created only on integer columns -ALTER TABLE t1 MODIFY STATISTICS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS pk TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 DROP STATISTICS a; -ALTER TABLE t1 DROP STATISTICS IF EXISTS a; -ALTER TABLE t1 CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 CLEAR STATISTICS IF EXISTS a; -ALTER TABLE t1 MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS b TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS a; +ALTER TABLE tab DROP STATISTICS IF EXISTS a; +ALTER TABLE tab CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab CLEAR STATISTICS IF EXISTS a; +ALTER TABLE tab MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; -ALTER TABLE t1 ADD STATISTICS b TYPE tdigest; -ALTER TABLE t1 MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; -ALTER TABLE t1 MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; +ALTER TABLE tab MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; +ALTER TABLE tab MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } -DROP TABLE t1; +DROP TABLE tab; From c390ecdb4df7e090ea94f10adda47dd73864c71c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 30 Jun 2024 10:11:04 +0000 Subject: [PATCH 179/273] Rename 02864_statistics_operate --> 02864_statistics_ddl --- .../02864_statistics_ddl.reference | 31 ++++++++++ .../0_stateless/02864_statistics_ddl.sql | 59 +++++++++++++++++++ .../02864_statistics_operate.reference | 31 ---------- .../0_stateless/02864_statistics_operate.sql | 57 ------------------ 4 files changed, 90 insertions(+), 88 deletions(-) create mode 100644 tests/queries/0_stateless/02864_statistics_ddl.reference create mode 100644 tests/queries/0_stateless/02864_statistics_ddl.sql delete mode 100644 tests/queries/0_stateless/02864_statistics_operate.reference delete mode 100644 tests/queries/0_stateless/02864_statistics_operate.sql diff --git a/tests/queries/0_stateless/02864_statistics_ddl.reference b/tests/queries/0_stateless/02864_statistics_ddl.reference new file mode 100644 index 00000000000..a7ff5caa0b0 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_ddl.reference @@ -0,0 +1,31 @@ +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After insert + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +10 +0 +After drop statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10), less(a, 10)) (removed) +10 +CREATE TABLE default.tab\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After add statistic +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After materialize statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +20 +After merge + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +20 +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After rename + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(c, 10)) (removed) +20 diff --git a/tests/queries/0_stateless/02864_statistics_ddl.sql b/tests/queries/0_stateless/02864_statistics_ddl.sql new file mode 100644 index 00000000000..fe612efe2ac --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_ddl.sql @@ -0,0 +1,59 @@ +-- Tests that various DDL statements create/drop/materialize statistics + +DROP TABLE IF EXISTS tab; + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; + +CREATE TABLE tab +( + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest), + pk String, +) Engine = MergeTree() ORDER BY pk +SETTINGS min_bytes_for_wide_part = 0; + +SHOW CREATE TABLE tab; + +INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; + +SELECT 'After insert'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; +SELECT count(*) FROM tab WHERE b < NULL and a < '10'; + +ALTER TABLE tab DROP STATISTICS a, b; + +SELECT 'After drop statistic'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +SHOW CREATE TABLE tab; + +ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest; + +SELECT 'After add statistic'; + +SHOW CREATE TABLE tab; + +ALTER TABLE tab MATERIALIZE STATISTICS a, b; +INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; + +SELECT 'After materialize statistic'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +OPTIMIZE TABLE tab FINAL; + +SELECT 'After merge'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +ALTER TABLE tab RENAME COLUMN b TO c; +SHOW CREATE TABLE tab; + +SELECT 'After rename'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE c < 10 and a < 10; + +DROP TABLE IF EXISTS tab; diff --git a/tests/queries/0_stateless/02864_statistics_operate.reference b/tests/queries/0_stateless/02864_statistics_operate.reference deleted file mode 100644 index 6398a9bd000..00000000000 --- a/tests/queries/0_stateless/02864_statistics_operate.reference +++ /dev/null @@ -1,31 +0,0 @@ -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After insert - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -10 -0 -After drop statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(b, 10), less(a, 10)) (removed) -10 -CREATE TABLE default.t1\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After add statistic -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After materialize statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -After merge - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After rename - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(c, 10)) (removed) -20 diff --git a/tests/queries/0_stateless/02864_statistics_operate.sql b/tests/queries/0_stateless/02864_statistics_operate.sql deleted file mode 100644 index bf69c11bc91..00000000000 --- a/tests/queries/0_stateless/02864_statistics_operate.sql +++ /dev/null @@ -1,57 +0,0 @@ -DROP TABLE IF EXISTS t1; - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; - -CREATE TABLE t1 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; - -SHOW CREATE TABLE t1; - -INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After insert'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; -SELECT count(*) FROM t1 WHERE b < NULL and a < '10'; - -ALTER TABLE t1 DROP STATISTICS a, b; - -SELECT 'After drop statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; - -SHOW CREATE TABLE t1; - -ALTER TABLE t1 ADD STATISTICS a, b TYPE tdigest; - -SELECT 'After add statistic'; - -SHOW CREATE TABLE t1; - -ALTER TABLE t1 MATERIALIZE STATISTICS a, b; -INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After materialize statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; - -OPTIMIZE TABLE t1 FINAL; - -SELECT 'After merge'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE b < 10 and a < 10; - -ALTER TABLE t1 RENAME COLUMN b TO c; -SHOW CREATE TABLE t1; - -SELECT 'After rename'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM t1 WHERE c < 10 and a < 10; - -DROP TABLE IF EXISTS t1; From 4f0916caa5ee07c3c612641782288ee42adfd92a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 30 Jun 2024 09:56:38 +0000 Subject: [PATCH 180/273] Rename test 03164_materialize_statistics --> 02864_statistics_materialize_in_merge Consistency with existing statistics tests - 02864_statistics_operate - 02864_statistics_exception - 02864_statistics_uniq --- ...statistics_materialize_in_merge.reference} | 0 .../02864_statistics_materialize_in_merge.sql | 52 +++++++++++++++++++ .../03164_materialize_statistics.sql | 49 ----------------- 3 files changed, 52 insertions(+), 49 deletions(-) rename tests/queries/0_stateless/{03164_materialize_statistics.reference => 02864_statistics_materialize_in_merge.reference} (100%) create mode 100644 tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql delete mode 100644 tests/queries/0_stateless/03164_materialize_statistics.sql diff --git a/tests/queries/0_stateless/03164_materialize_statistics.reference b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference similarity index 100% rename from tests/queries/0_stateless/03164_materialize_statistics.reference rename to tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference diff --git a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql new file mode 100644 index 00000000000..3e15ec1148e --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql @@ -0,0 +1,52 @@ +-- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0'). + +DROP TABLE IF EXISTS tab; + +SET allow_experimental_analyzer = 1; +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; + +SET materialize_statistics_on_insert = 0; + +CREATE TABLE tab +( + a Int64 STATISTICS(tdigest), + b Int16 STATISTICS(tdigest), +) ENGINE = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. + +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used'; + +OPTIMIZE TABLE tab FINAL; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge'; + +TRUNCATE TABLE tab; +SET mutations_sync = 2; + +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; +ALTER TABLE tab MATERIALIZE STATISTICS a, b; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize'; + +DROP TABLE tab; + +SYSTEM FLUSH LOGS; + +SELECT log_comment, message FROM system.text_log JOIN +( + SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log + WHERE current_database = currentDatabase() + AND query LIKE 'SELECT count(*) FROM tab%' + AND type = 'QueryFinish' +) AS query_log USING (query_id) +WHERE message LIKE '%moved to PREWHERE%' +ORDER BY event_time_microseconds; + +SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) +FROM system.query_log +WHERE current_database = currentDatabase() + AND query LIKE 'INSERT INTO tab SELECT%' + AND type = 'QueryFinish'; diff --git a/tests/queries/0_stateless/03164_materialize_statistics.sql b/tests/queries/0_stateless/03164_materialize_statistics.sql deleted file mode 100644 index 43c5724dd59..00000000000 --- a/tests/queries/0_stateless/03164_materialize_statistics.sql +++ /dev/null @@ -1,49 +0,0 @@ -DROP TABLE IF EXISTS t_statistics_materialize; - -SET allow_experimental_analyzer = 1; -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; -SET materialize_statistics_on_insert = 0; - -CREATE TABLE t_statistics_materialize -( - a Int64 STATISTICS(tdigest), - b Int16 STATISTICS(tdigest), -) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. - -INSERT INTO t_statistics_materialize SELECT number, -number FROM system.numbers LIMIT 10000; - -SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used'; - -OPTIMIZE TABLE t_statistics_materialize FINAL; - -SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge'; - -TRUNCATE TABLE t_statistics_materialize; -SET mutations_sync = 2; - -INSERT INTO t_statistics_materialize SELECT number, -number FROM system.numbers LIMIT 10000; -ALTER TABLE t_statistics_materialize MATERIALIZE STATISTICS a, b; - -SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize'; - -DROP TABLE t_statistics_materialize; - -SYSTEM FLUSH LOGS; - -SELECT log_comment, message FROM system.text_log JOIN -( - SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log - WHERE current_database = currentDatabase() - AND query LIKE 'SELECT count(*) FROM t_statistics_materialize%' - AND type = 'QueryFinish' -) AS query_log USING (query_id) -WHERE message LIKE '%moved to PREWHERE%' -ORDER BY event_time_microseconds; - -SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) -FROM system.query_log -WHERE current_database = currentDatabase() - AND query LIKE 'INSERT INTO t_statistics_materialize SELECT%' - AND type = 'QueryFinish'; From 5f53a73457f0851f78b82e2d5559f4b654eaf6e7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 3 Jul 2024 12:05:49 +0200 Subject: [PATCH 181/273] Fix config merging for from_env with replace overrides Without this patch new test fails with: Exception: Failed to preprocess config '/etc/clickhouse-server/config.xml': Exception: Element has value and does not have 'replace' attribute, can't process from_env substitution. Stack trace: Signed-off-by: Azat Khuzhin --- src/Common/Config/ConfigProcessor.cpp | 1 - .../configs/000-server_overrides.xml | 3 ++ ...subst.xml => 000-users_with_env_subst.xml} | 0 .../configs/010-server_with_env_subst.xml | 3 ++ .../test_config_substitutions/test.py | 30 ++++++++++++++++--- 5 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 tests/integration/test_config_substitutions/configs/000-server_overrides.xml rename tests/integration/test_config_substitutions/configs/{000-config_with_env_subst.xml => 000-users_with_env_subst.xml} (100%) create mode 100644 tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index c9832e8efd5..67d6036aa51 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -316,7 +316,6 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, } else if (replace) { - with_element.removeAttribute("replace"); NodePtr new_node = config->importNode(with_node, true); config_root->replaceChild(new_node, config_node); } diff --git a/tests/integration/test_config_substitutions/configs/000-server_overrides.xml b/tests/integration/test_config_substitutions/configs/000-server_overrides.xml new file mode 100644 index 00000000000..9335f663d68 --- /dev/null +++ b/tests/integration/test_config_substitutions/configs/000-server_overrides.xml @@ -0,0 +1,3 @@ + + 10000 + diff --git a/tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml b/tests/integration/test_config_substitutions/configs/000-users_with_env_subst.xml similarity index 100% rename from tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml rename to tests/integration/test_config_substitutions/configs/000-users_with_env_subst.xml diff --git a/tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml b/tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml new file mode 100644 index 00000000000..ea91f066a21 --- /dev/null +++ b/tests/integration/test_config_substitutions/configs/010-server_with_env_subst.xml @@ -0,0 +1,3 @@ + + + diff --git a/tests/integration/test_config_substitutions/test.py b/tests/integration/test_config_substitutions/test.py index faceab6fbcd..124dbcaedf7 100644 --- a/tests/integration/test_config_substitutions/test.py +++ b/tests/integration/test_config_substitutions/test.py @@ -39,9 +39,13 @@ node6 = cluster.add_instance( node7 = cluster.add_instance( "node7", user_configs=[ - "configs/000-config_with_env_subst.xml", + "configs/000-users_with_env_subst.xml", "configs/010-env_subst_override.xml", ], + main_configs=[ + "configs/000-server_overrides.xml", + "configs/010-server_with_env_subst.xml", + ], env_variables={ # overridden with 424242 "MAX_QUERY_SIZE": "121212", @@ -126,9 +130,9 @@ def test_config(start_cluster): ) -def test_config_invalid_overrides(start_cluster): +def test_config_from_env_overrides(start_cluster): node7.replace_config( - "/etc/clickhouse-server/users.d/000-config_with_env_subst.xml", + "/etc/clickhouse-server/users.d/000-users_with_env_subst.xml", """ @@ -156,7 +160,7 @@ def test_config_invalid_overrides(start_cluster): ): node7.query("SYSTEM RELOAD CONFIG") node7.replace_config( - "/etc/clickhouse-server/users.d/000-config_with_env_subst.xml", + "/etc/clickhouse-server/users.d/000-users_with_env_subst.xml", """ @@ -181,6 +185,24 @@ def test_config_invalid_overrides(start_cluster): node7.query("SYSTEM RELOAD CONFIG") +def test_config_merge_from_env_overrides(start_cluster): + assert ( + node7.query( + "SELECT value FROM system.server_settings WHERE name='max_thread_pool_size'" + ) + == "10000\n" + ) + node7.replace_config( + "/etc/clickhouse-server/config.d/010-server_with_env_subst.xml", + """ + + 9000 + +""", + ) + node7.query("SYSTEM RELOAD CONFIG") + + def test_include_config(start_cluster): # assert node4.query("select 1") From 6ccb26b1aa6c1d79e0388a72afd4b0f9edb2ea1c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 10:26:19 +0000 Subject: [PATCH 182/273] Switch to a virtual interface to get rid of static_pointer_cast --- src/Storages/Statistics/Statistics.cpp | 42 ++++++++++++++++----- src/Storages/Statistics/Statistics.h | 9 +++++ src/Storages/Statistics/StatisticsTDigest.h | 4 +- src/Storages/Statistics/StatisticsUniq.cpp | 2 +- src/Storages/Statistics/StatisticsUniq.h | 2 +- 5 files changed, 45 insertions(+), 14 deletions(-) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index c454adccc06..28e75c6d244 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -41,10 +41,35 @@ void ColumnStatistics::update(const ColumnPtr & column) stat.second->update(column); } +UInt64 IStatistics::estimateCardinality() const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cardinality estimation is not implemented for this type of statistics"); +} + +Float64 IStatistics::estimateEqual(Float64 /*val*/) const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Equality estimation is not implemented for this type of statistics"); +} + +Float64 IStatistics::estimateLess(Float64 /*val*/) const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics"); +} + +/// ------------------------------------- +/// Implementation of the estimation: +/// Note: Each statistics object supports certain types predicates natively, e.g. +/// - TDigest: '< X' (less-than predicates) +/// - Count-min sketches: '= X' (equal predicates) +/// - Uniq (HyperLogLog): 'count distinct(*)' (column cardinality) +/// If multiple statistics objects are available per column, it is sometimes also possible to combine them in a clever way. +/// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics +/// object that supports it natively. + Float64 ColumnStatistics::estimateLess(Float64 val) const { if (stats.contains(StatisticsType::TDigest)) - return std::static_pointer_cast(stats.at(StatisticsType::TDigest))->estimateLess(val); + return stats.at(StatisticsType::TDigest)->estimateLess(val); return rows * ConditionSelectivityEstimator::default_normal_cond_factor; } @@ -57,14 +82,9 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const { if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) { - auto statistics_uniq = std::static_pointer_cast(stats.at(StatisticsType::Uniq)); - /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) - /// for every bucket. - if (statistics_uniq->getCardinality() < 2048) - { - auto tdigest_static = std::static_pointer_cast(stats.at(StatisticsType::TDigest)); - return tdigest_static->estimateEqual(val); - } + /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket. + if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048) + return stats.at(StatisticsType::TDigest)->estimateEqual(val); } if (val < - ConditionSelectivityEstimator::threshold || val > ConditionSelectivityEstimator::threshold) return rows * ConditionSelectivityEstimator::default_normal_cond_factor; @@ -72,6 +92,8 @@ Float64 ColumnStatistics::estimateEqual(Float64 val) const return rows * ConditionSelectivityEstimator::default_good_cond_factor; } +/// ------------------------------------- + void ColumnStatistics::serialize(WriteBuffer & buf) { writeIntBinary(V0, buf); @@ -81,7 +103,7 @@ void ColumnStatistics::serialize(WriteBuffer & buf) stat_types_mask |= 1 << UInt8(type); writeIntBinary(stat_types_mask, buf); - /// store the column row count as it is always useful + /// as the column row count is always useful, save it in any case writeIntBinary(rows, buf); /// write the actual statistics object diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index 4af7c423257..d4364075d1c 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -28,6 +28,15 @@ public: virtual void serialize(WriteBuffer & buf) = 0; virtual void deserialize(ReadBuffer & buf) = 0; + /// Estimate the cardinality of the column. + /// Throws if the statistics object is not able to do a meaningful estimation. + virtual UInt64 estimateCardinality() const; + + /// Per-value estimations. + /// Throws if the statistics object is not able to do a meaningful estimation. + virtual Float64 estimateEqual(Float64 val) const; /// cardinality of val in the column + virtual Float64 estimateLess(Float64 val) const; /// summarized cardinality of values < val in the column + protected: SingleStatisticsDescription stat; }; diff --git a/src/Storages/Statistics/StatisticsTDigest.h b/src/Storages/Statistics/StatisticsTDigest.h index f391d0b17e6..d3a3bf115ee 100644 --- a/src/Storages/Statistics/StatisticsTDigest.h +++ b/src/Storages/Statistics/StatisticsTDigest.h @@ -16,8 +16,8 @@ public: void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; - Float64 estimateLess(Float64 val) const; - Float64 estimateEqual(Float64 val) const; + Float64 estimateLess(Float64 val) const override; + Float64 estimateEqual(Float64 val) const override; private: QuantileTDigest t_digest; diff --git a/src/Storages/Statistics/StatisticsUniq.cpp b/src/Storages/Statistics/StatisticsUniq.cpp index 4e24e5f0e96..bf9a40ea8cb 100644 --- a/src/Storages/Statistics/StatisticsUniq.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -44,7 +44,7 @@ void StatisticsUniq::deserialize(ReadBuffer & buf) collector->deserialize(data, buf); } -UInt64 StatisticsUniq::getCardinality() +UInt64 StatisticsUniq::estimateCardinality() const { auto column = DataTypeUInt64().createColumn(); collector->insertResultInto(data, *column, nullptr); diff --git a/src/Storages/Statistics/StatisticsUniq.h b/src/Storages/Statistics/StatisticsUniq.h index 1c521fa9984..5290585bd94 100644 --- a/src/Storages/Statistics/StatisticsUniq.h +++ b/src/Storages/Statistics/StatisticsUniq.h @@ -18,7 +18,7 @@ public: void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; - UInt64 getCardinality(); + UInt64 estimateCardinality() const override; private: std::unique_ptr arena; From 10d48afc20d23043c8e89bd804259b19247dc03c Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 13:16:01 +0200 Subject: [PATCH 183/273] token_info is defined always --- .../MergeTree/ReplicatedMergeTreeSink.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 3677f5b02ab..dedb4a9ddae 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -297,16 +297,13 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) String block_dedup_token; auto token_info = chunk.getChunkInfos().get(); - if constexpr (!async_insert) - { - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", - storage.getStorageID().getNameForLogs()); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); - if (token_info->isDefined()) - block_dedup_token = token_info->getToken(); - } + if (token_info->isDefined()) + block_dedup_token = token_info->getToken(); auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); From c08293026570e00f7f4332d7b0d4b3eb646db1d5 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 13:43:27 +0200 Subject: [PATCH 184/273] rename to buildPreAndSinkChains --- src/Interpreters/InterpreterInsertQuery.cpp | 6 +++--- src/Interpreters/InterpreterInsertQuery.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 5e8b8601f08..2becea61b3a 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -397,7 +397,7 @@ Chain InterpreterInsertQuery::buildPreSinkChain( return out; } -std::pair, std::vector> InterpreterInsertQuery::buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) +std::pair, std::vector> InterpreterInsertQuery::buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) { chassert(presink_streams > 0); chassert(sink_streams > 0); @@ -612,7 +612,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & sink_streams_size = 1; } - auto [presink_chains, sink_chains] = buildPreAndSyncChains( + auto [presink_chains, sink_chains] = buildPreAndSinkChains( presink_streams_size, sink_streams_size, table, metadata_snapshot, query_sample_block); @@ -673,7 +673,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query Chain chain; { - auto [presink_chains, sink_chains] = buildPreAndSyncChains( + auto [presink_chains, sink_chains] = buildPreAndSinkChains( 1, 1, table, metadata_snapshot, query_sample_block); diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index b06bb9a3db2..894c7c42144 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -79,7 +79,7 @@ private: std::vector> owned_buffers; - std::pair, std::vector> buildPreAndSyncChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); + std::pair, std::vector> buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table); QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table); From 8777363670dcc8775037f7104e90eea05f0fa0b2 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:47:45 +0200 Subject: [PATCH 185/273] Update src/Processors/Transforms/DeduplicationTokenTransforms.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Processors/Transforms/DeduplicationTokenTransforms.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h index 94287dc4487..d6aff9e1370 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -142,7 +142,7 @@ namespace DeduplicationToken String getName() const override { return "DeduplicationToken::DefineSourceWithChunkHashesTransform"; } - // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with heshes from the parts. + // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with hashes from the parts. // But if there is some table with different engine, we still need to define the source of the data in deduplication token // We use that transform to define the source as a hash of entire block in deduplication token void transform(Chunk & chunk) override; From 5c88d5b48ad75cbe3f8e15e428d8d24380c23943 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:47:55 +0200 Subject: [PATCH 186/273] Update src/Interpreters/InterpreterInsertQuery.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Interpreters/InterpreterInsertQuery.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2becea61b3a..15b9b155d54 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -828,7 +828,13 @@ void registerInterpreterInsertQuery(InterpreterFactory & factory) { auto create_fn = [] (const InterpreterFactory::Arguments & args) { - return std::make_unique(args.query, args.context, args.allow_materialized, false, false, false); + return std::make_unique( + args.query, + args.context, + args.allow_materialized, + /* no_squash */false, + /* no_destination */false, + /* async_insert */false); }; factory.registerInterpreter("InterpreterInsertQuery", create_fn); } From aee1289f2d89b83d5f5255792fe73784ec824ca1 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:48:04 +0200 Subject: [PATCH 187/273] Update src/Interpreters/InterpreterInsertQuery.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Interpreters/InterpreterInsertQuery.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 15b9b155d54..2581a368272 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -791,12 +791,8 @@ BlockIO InterpreterInsertQuery::execute() res.pipeline.addStorageHolder(table); - StoragePtr inner_table; if (const auto * mv = dynamic_cast(table.get())) - inner_table = mv->getTargetTable(); - - if (inner_table) - res.pipeline.addStorageHolder(inner_table); + res.pipeline.addStorageHolder(mv->getTargetTable()); return res; } From f0aa006461dd4118dada9c6262d53fc703d0af82 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:48:32 +0200 Subject: [PATCH 188/273] Update src/Interpreters/InterpreterInsertQuery.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2581a368272..f9b57f530f0 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -580,7 +580,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & if (!settings.insert_deduplication_token.value.empty()) { - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { return std::make_shared(settings.insert_deduplication_token.value, in_header); }); From c4207e9a6ef7c8ccd5e1c837535268e2f9f04b70 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:48:45 +0200 Subject: [PATCH 189/273] Update src/Interpreters/InterpreterInsertQuery.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index f9b57f530f0..333da81ced0 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -585,7 +585,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & return std::make_shared(settings.insert_deduplication_token.value, in_header); }); - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { return std::make_shared(in_header); }); From 913e97b1a5560536bfcdc722812a53395370a435 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 14:05:06 +0200 Subject: [PATCH 190/273] work with review comments --- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 6 ++++-- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 6 ++++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 333da81ced0..d7f778f6678 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -674,7 +674,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query { auto [presink_chains, sink_chains] = buildPreAndSinkChains( - 1, 1, + /* presink_streams */1, /* sink_streams */1, table, metadata_snapshot, query_sample_block); chain = std::move(presink_chains.front()); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 4a1163d2317..d8cfce1ca99 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -95,6 +95,8 @@ void MergeTreeSink::consume(Chunk & chunk) "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); + const bool need_to_define_dedup_token = !token_info->isDefined(); + String block_dedup_token; if (token_info->isDefined()) block_dedup_token = token_info->getToken(); @@ -123,7 +125,7 @@ void MergeTreeSink::consume(Chunk & chunk) if (!temp_part.part) continue; - if (!token_info->isDefined()) + if (need_to_define_dedup_token) { chassert(temp_part.part); const auto hash_value = temp_part.part->getPartBlockIDHash(); @@ -166,7 +168,7 @@ void MergeTreeSink::consume(Chunk & chunk) }); } - if (!token_info->isDefined()) + if (need_to_define_dedup_token) { token_info->finishChunkHashes(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index dedb4a9ddae..bbae054fbed 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -302,6 +302,8 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", storage.getStorageID().getNameForLogs()); + const bool need_to_define_dedup_token = !token_info->isDefined(); + if (token_info->isDefined()) block_dedup_token = token_info->getToken(); @@ -368,7 +370,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - if (!token_info->isDefined()) + if (need_to_define_dedup_token) { chassert(temp_part.part); const auto hash_value = temp_part.part->getPartBlockIDHash(); @@ -419,7 +421,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) )); } - if (!token_info->isDefined()) + if (need_to_define_dedup_token) { token_info->finishChunkHashes(); } From 6d3d33638ac45b8d7e6fd2d788335a40539548e8 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 3 Jul 2024 14:22:33 +0200 Subject: [PATCH 191/273] Fix lock-order-inversion in DatabaseCatalog --- src/Interpreters/DatabaseCatalog.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index aaec94a4fb0..841decf29c5 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -274,10 +274,12 @@ void DatabaseCatalog::shutdownImpl() database->shutdown(); } + TablesMarkedAsDropped tables_marked_dropped_to_destroy; { std::lock_guard lock(tables_marked_dropped_mutex); - tables_marked_dropped.clear(); + tables_marked_dropped.swap(tables_marked_dropped_to_destroy); } + tables_marked_dropped_to_destroy.clear(); std::lock_guard lock(databases_mutex); for (const auto & db : databases) From 2a1c13b070fbb3ad38ad8820f004b8687dab9425 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 3 Jul 2024 14:25:31 +0200 Subject: [PATCH 192/273] Add comment about the changes --- base/base/itoa.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index c17a2bfd999..60231507c96 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -119,6 +119,13 @@ inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i) if (n < U(1e2)) { + /// This is changed from the original jeaiii implementation + /// For small numbers the extra branch to call outOneDigit() is worth it as it saves some instructions + /// and a memory access (no need to read digits.fd[n]) + /// This is not true for pure random numbers, but that's not the common use case of a database + /// Original jeaii code + // *reinterpret_cast(b) = digits.fd[n]; + // return n < 10 ? b + 1 : b + 2; return n < 10 ? outOneDigit(b, n) : outTwoDigits(b, n); } if (n < UInt32(1e6)) From 3a09000e4448c921cc9faefd387ef8b383a89c1a Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 14:33:21 +0200 Subject: [PATCH 193/273] remove trailing whitespaces --- src/Interpreters/InterpreterInsertQuery.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index d7f778f6678..2cbfc55d008 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -825,10 +825,10 @@ void registerInterpreterInsertQuery(InterpreterFactory & factory) auto create_fn = [] (const InterpreterFactory::Arguments & args) { return std::make_unique( - args.query, - args.context, - args.allow_materialized, - /* no_squash */false, + args.query, + args.context, + args.allow_materialized, + /* no_squash */false, /* no_destination */false, /* async_insert */false); }; From 4e6bdb15b0c58bd6d3457f21b2f9a493b698904e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 3 Jul 2024 14:35:17 +0200 Subject: [PATCH 194/273] Azure policy --- docker/test/stress/run.sh | 40 +++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 96f8ecb2fab..323944591b1 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -110,6 +110,15 @@ start_server clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "SHOW TABLES FROM test" +if [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + TEMP_POLICY="s3_cache" +elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + TEMP_POLICY="azure_cache" +else + TEMP_POLICY="default" +fi + + clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, @@ -135,7 +144,7 @@ clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnabl URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) - ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, @@ -161,7 +170,7 @@ clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable U URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) - ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, @@ -195,7 +204,7 @@ clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDat Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) - SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" @@ -216,13 +225,24 @@ export ZOOKEEPER_FAULT_INJECTION=1 export THREAD_POOL_FAULT_INJECTION=1 configure -# But we still need default disk because some tables loaded only into it -sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ - | sed "s|
s3
|
s3
default|" \ - > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp -mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +if [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + # But we still need default disk because some tables loaded only into it + sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ + | sed "s|
s3
|
s3
default|" \ + > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp + mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + # But we still need default disk because some tables loaded only into it + sudo cat /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml \ + | sed "s|
azure
|
azure
default|" \ + > /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml.tmp + mv /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml + sudo chown clickhouse /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml + sudo chgrp clickhouse /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml +fi + sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \ | sed "s|trace|test|" \ From 97e2c8c7d220aa073d3bbbe0f1a9624ab28a2076 Mon Sep 17 00:00:00 2001 From: Mikhail Gorshkov Date: Wed, 3 Jul 2024 12:56:20 +0000 Subject: [PATCH 195/273] PR review follow-up --- src/Functions/FunctionsRound.h | 88 ++-------------------------------- 1 file changed, 4 insertions(+), 84 deletions(-) diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index d43f7f264b4..357b8c03044 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -586,62 +586,8 @@ struct Dispatcher const auto * value_col_typed_const = checkAndGetColumnConst>(value_col); if (value_col_typed_const) { - const auto & value_data = value_col_typed_const->template getValue(); - // Const scale argument: - auto col_res = ColumnVector::create(); - typename ColumnVector::Container & vec_res = col_res->getData(); - if (scale_col == nullptr || isColumnConst(*scale_col)) - { - vec_res.resize(1); - auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); - if (scale_arg == 0) - { - size_t scale = 1; - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); - } - else if (scale_arg > 0) - { - size_t scale = intExp10(scale_arg); - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); - } - else - { - size_t scale = intExp10(-scale_arg); - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[0]); - } - } - /// Non-const scale argument: - else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) - { - const auto & scale_data = scale_col_typed->getData(); - const size_t rows = scale_data.size(); - - vec_res.resize(rows); - - for (size_t i = 0; i < rows; ++i) - { - Int64 scale64 = scale_data[i]; - validateScale(scale64); - Scale raw_scale = scale64; - - if (raw_scale == 0) - { - size_t scale = 1; - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); - } - else if (raw_scale > 0) - { - size_t scale = intExp10(raw_scale); - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); - } - else - { - size_t scale = intExp10(-raw_scale); - FunctionRoundingImpl::applyOne(value_data, scale, vec_res[i]); - } - } - } - return col_res; + auto value_col_full = value_col_typed_const->convertToFullColumn(); + return apply(value_col_full.get(), scale_col); } return nullptr; } @@ -697,34 +643,8 @@ public: const auto * value_col_typed_const = checkAndGetColumnConst>(value_col); if (value_col_typed_const) { - auto col = assert_cast*>(value_col_typed_const->getDataColumnPtr().get()); - const auto & value_data = value_col_typed_const->template getValue(); - // Const scale argument: - if (scale_col == nullptr || isColumnConst(*scale_col)) - { - auto col_res = ColumnDecimal::create(1, col->getScale()); - auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); - DecimalRoundingImpl::applyOne(value_data, col->getScale(), reinterpret_cast::NativeT&>(col_res->getElement(0)), scale_arg); - return col_res; - } - /// Non-const scale argument: - if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) - { - const auto & scale = scale_col_typed->getData(); - const size_t rows = scale.size(); - auto col_res = ColumnDecimal::create(rows, col->getScale()); - - for (size_t i = 0; i < rows; ++i) - { - Int64 scale64 = scale[i]; - validateScale(scale64); - Scale raw_scale = scale64; - - DecimalRoundingImpl::applyOne(value_data, col->getScale(), - reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); - } - return col_res; - } + auto value_col_full = value_col_typed_const->convertToFullColumn(); + return apply(value_col_full.get(), scale_col); } return nullptr; } From 88601ae86914ea152fc0ae8e5fd74fe030598b18 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 3 Jul 2024 15:25:12 +0200 Subject: [PATCH 196/273] avoid conflicts in SettingsChangesHistory --- .gitattributes | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitattributes b/.gitattributes index 56d6fecf4b8..dd94a48f8e7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,3 +2,4 @@ contrib/* linguist-vendored *.h linguist-language=C++ tests/queries/0_stateless/data_json/* binary tests/queries/0_stateless/*.reference -crlf +src/Core/SettingsChangesHistory.cpp merge=union From 5fd36059e48c4377ea526343e7272009d2ccaf2a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 15:28:01 +0200 Subject: [PATCH 197/273] Try disabling background threads --- contrib/jemalloc-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 023fdcf103a..cc5a391676f 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -34,7 +34,7 @@ if (OS_LINUX) # avoid spurious latencies and additional work associated with # MADV_DONTNEED. See # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false") else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") endif() From 1c14a458e72bc9554e851c20ffff9bfa03e5446e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 16:11:02 +0200 Subject: [PATCH 198/273] Add profile events for regex cache --- src/Common/ProfileEvents.cpp | 4 ++++ src/Functions/Regexps.h | 22 +++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index d98373b6c55..cd5f67fdff2 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -239,6 +239,10 @@ M(CannotRemoveEphemeralNode, "Number of times an error happened while trying to remove ephemeral node. This is not an issue, because our implementation of ZooKeeper library guarantee that the session will expire and the node will be removed.") \ \ M(RegexpCreated, "Compiled regular expressions. Identical regular expressions compiled just once and cached forever.") \ + M(RegexpGlobalCacheHit, "Number of times we fetched compiled regular expression from the global cache.") \ + M(RegexpGlobalCacheMiss, "Number of times we failed to fetch compiled regular expression from the global cache.") \ + M(RegexpLocalCacheHit, "Number of times we fetched compiled regular expression from the local cache.") \ + M(RegexpLocalCacheMiss, "Number of times we failed to fetch compiled regular expression from the local cache.") \ M(ContextLock, "Number of times the lock of Context was acquired or tried to acquire. This is global lock.") \ M(ContextLockWaitMicroseconds, "Context lock wait time in microseconds") \ \ diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index b6bd463212f..fff21fdb941 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -24,6 +24,10 @@ namespace ProfileEvents { extern const Event RegexpCreated; +extern const Event RegexpGlobalCacheHit; +extern const Event RegexpGlobalCacheMiss; +extern const Event RegexpLocalCacheHit; +extern const Event RegexpLocalCacheMiss; } @@ -72,18 +76,28 @@ public: Bucket & bucket = known_regexps[hasher(pattern) % CACHE_SIZE]; if (bucket.regexp == nullptr) [[unlikely]] + { /// insert new entry + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss); bucket = {pattern, std::make_shared(createRegexp(pattern))}; + } else + { if (pattern != bucket.pattern) + { + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss); /// replace existing entry bucket = {pattern, std::make_shared(createRegexp(pattern))}; + } + else + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheHit); + } return bucket.regexp; } private: - constexpr static size_t CACHE_SIZE = 100; /// collision probability + constexpr static size_t CACHE_SIZE = 1000; /// collision probability std::hash hasher; struct Bucket @@ -322,9 +336,11 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector(str_patterns, edit_distance); }); + ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheMiss); bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps}; } else + { if (bucket.patterns != str_patterns || bucket.edit_distance != edit_distance) { /// replace existing entry @@ -333,8 +349,12 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector(str_patterns, edit_distance); }); + ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheMiss); bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps}; } + else + ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheHit); + } return bucket.regexps; } From c96e3c6d1a9edb1e0d22cf818994f7647fa4a9d2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 3 Jul 2024 16:21:36 +0200 Subject: [PATCH 199/273] Fix which I don't understand --- .../MergeTree/MergeTreeDataPartWriterWide.cpp | 11 +++++++++-- ...3198_non_adaptive_granularity_no_errors.reference | 2 ++ .../03198_non_adaptive_granularity_no_errors.sql | 12 ++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference create mode 100644 tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 9666f310d3d..a69d21de8e7 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -560,7 +560,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai { /// With fixed granularity we can have last mark with less rows than granularity const bool is_last_mark = (mark_num + 1 == index_granularity.getMarksCount()); - if (!data_part->index_granularity_info.fixed_index_granularity || !is_last_mark) + if (!index_granularity_info.fixed_index_granularity || !is_last_mark) throw Exception( ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{}" @@ -785,7 +785,7 @@ void MergeTreeDataPartWriterWide::adjustLastMarkIfNeedAndFlushToDisk(size_t new_ /// We can adjust marks only if we computed granularity for blocks. /// Otherwise we cannot change granularity because it will differ from /// other columns -// if (compute_granularity && settings.can_use_adaptive_granularity) + if (compute_granularity && settings.can_use_adaptive_granularity) { if (getCurrentMark() != index_granularity.getMarksCount() - 1) throw Exception(ErrorCodes::LOGICAL_ERROR, @@ -824,7 +824,14 @@ void MergeTreeDataPartWriterWide::adjustLastMarkIfNeedAndFlushToDisk(size_t new_ /// Without offset rows_written_in_last_mark = 0; } + + if (compute_granularity) + { + index_granularity.popMark(); + index_granularity.appendMark(new_rows_in_last_mark); + } } + } } diff --git a/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference new file mode 100644 index 00000000000..fcd78da1283 --- /dev/null +++ b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference @@ -0,0 +1,2 @@ +1000000 +1000000 diff --git a/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql new file mode 100644 index 00000000000..25798ef6d33 --- /dev/null +++ b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS data_02051__fuzz_24; + +CREATE TABLE data_02051__fuzz_24 (`key` Int16, `value` String) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity_bytes = 0, min_rows_for_wide_part = 0, min_bytes_for_wide_part=0 AS SELECT number, repeat(toString(number), 5) FROM numbers(1000000.); + +SELECT count(ignore(*)) FROM data_02051__fuzz_24 PREWHERE materialize(1) GROUP BY ignore(*); + +detach table data_02051__fuzz_24; +attach table data_02051__fuzz_24; + +SELECT count(ignore(*)) FROM data_02051__fuzz_24 PREWHERE materialize(1) GROUP BY ignore(*); + +DROP TABLE data_02051__fuzz_24; From 8319d2579789aee45a8e02cac0131e7dc348eedd Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 14:43:47 +0000 Subject: [PATCH 200/273] Minor updates --- src/Common/ProfileEvents.cpp | 11 ++++++----- src/Functions/Regexps.h | 22 +++++++++++----------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index cd5f67fdff2..2e3984f8f10 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -238,11 +238,12 @@ \ M(CannotRemoveEphemeralNode, "Number of times an error happened while trying to remove ephemeral node. This is not an issue, because our implementation of ZooKeeper library guarantee that the session will expire and the node will be removed.") \ \ - M(RegexpCreated, "Compiled regular expressions. Identical regular expressions compiled just once and cached forever.") \ - M(RegexpGlobalCacheHit, "Number of times we fetched compiled regular expression from the global cache.") \ - M(RegexpGlobalCacheMiss, "Number of times we failed to fetch compiled regular expression from the global cache.") \ - M(RegexpLocalCacheHit, "Number of times we fetched compiled regular expression from the local cache.") \ - M(RegexpLocalCacheMiss, "Number of times we failed to fetch compiled regular expression from the local cache.") \ + M(RegexpWithMultipleNeedlesCreated, "Regular expressions with multiple needles (VectorScan library) compiled.") \ + M(RegexpWithMultipleNeedlesCacheHit, "Number of times we fetched compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ + M(RegexpWithMultipleNeedlesCacheMiss, "Number of times we failed to fetch compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ + M(RegexpLocalCacheHit, "Number of times we fetched compiled regular expression from a local cache.") \ + M(RegexpLocalCacheMiss, "Number of times we failed to fetch compiled regular expression from a local cache.") \ + \ M(ContextLock, "Number of times the lock of Context was acquired or tried to acquire. This is global lock.") \ M(ContextLockWaitMicroseconds, "Context lock wait time in microseconds") \ \ diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index fff21fdb941..b317d786fab 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -23,11 +23,11 @@ namespace ProfileEvents { -extern const Event RegexpCreated; -extern const Event RegexpGlobalCacheHit; -extern const Event RegexpGlobalCacheMiss; -extern const Event RegexpLocalCacheHit; -extern const Event RegexpLocalCacheMiss; + extern const Event RegexpWithMultipleNeedlesCreated; + extern const Event RegexpWithMultipleNeedlesGlobalCacheHit; + extern const Event RegexpWithMultipleNeedlesGlobalCacheMiss; + extern const Event RegexpLocalCacheHit; + extern const Event RegexpLocalCacheMiss; } @@ -85,8 +85,8 @@ public: { if (pattern != bucket.pattern) { - ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss); /// replace existing entry + ProfileEvents::increment(ProfileEvents::RegexpLocalCacheMiss); bucket = {pattern, std::make_shared(createRegexp(pattern))}; } else @@ -97,7 +97,7 @@ public: } private: - constexpr static size_t CACHE_SIZE = 1000; /// collision probability + constexpr static size_t CACHE_SIZE = 1'000; /// collision probability std::hash hasher; struct Bucket @@ -258,7 +258,7 @@ inline Regexps constructRegexps(const std::vector & str_patterns, [[mayb throw Exception(ErrorCodes::BAD_ARGUMENTS, "Pattern '{}' failed with error '{}'", str_patterns[error->expression], String(error->message)); } - ProfileEvents::increment(ProfileEvents::RegexpCreated); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesCreated); /// We allocate the scratch space only once, then copy it across multiple threads with hs_clone_scratch /// function which is faster than allocating scratch space each time in each thread. @@ -336,7 +336,7 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector(str_patterns, edit_distance); }); - ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheMiss); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheMiss); bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps}; } else @@ -349,11 +349,11 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector(str_patterns, edit_distance); }); - ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheMiss); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheMiss); bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps}; } else - ProfileEvents::increment(ProfileEvents::RegexpGlobalCacheHit); + ProfileEvents::increment(ProfileEvents::RegexpWithMultipleNeedlesGlobalCacheHit); } return bucket.regexps; From 07f51e02eda1c8194da28317e4d8452a5c52fc40 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 16:54:09 +0200 Subject: [PATCH 201/273] Reuse some checks --- programs/CMakeLists.txt | 4 +- programs/keeper/keeper_main.cpp | 266 +----------------------------- programs/main.cpp | 268 +------------------------------ src/Common/Coverage.cpp | 45 ++++++ src/Common/Coverage.h | 5 + src/Common/EnvironmentChecks.cpp | 234 +++++++++++++++++++++++++++ src/Common/EnvironmentChecks.h | 5 + 7 files changed, 297 insertions(+), 530 deletions(-) create mode 100644 src/Common/Coverage.cpp create mode 100644 src/Common/Coverage.h create mode 100644 src/Common/EnvironmentChecks.cpp create mode 100644 src/Common/EnvironmentChecks.h diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index b06290ae352..6b3a0b16624 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -73,9 +73,9 @@ else() endif() if (ENABLE_CLICKHOUSE_KEEPER) - message(STATUS "ClickHouse keeper mode: ON") + message(STATUS "ClickHouse Keeper: ON") else() - message(STATUS "ClickHouse keeper mode: OFF") + message(STATUS "ClickHouse Keeper: OFF") endif() if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) diff --git a/programs/keeper/keeper_main.cpp b/programs/keeper/keeper_main.cpp index ec9b84ce94b..a240f9699f2 100644 --- a/programs/keeper/keeper_main.cpp +++ b/programs/keeper/keeper_main.cpp @@ -1,11 +1,9 @@ -#include #include #include #include #include #include -#include #include #include /// pair @@ -14,6 +12,9 @@ #include "config.h" #include "config_tools.h" +#include +#include + #include #include #include @@ -59,270 +60,9 @@ int printHelp(int, char **) return -1; } - -enum class InstructionFail : uint8_t -{ - NONE = 0, - SSE3 = 1, - SSSE3 = 2, - SSE4_1 = 3, - SSE4_2 = 4, - POPCNT = 5, - AVX = 6, - AVX2 = 7, - AVX512 = 8 -}; - -auto instructionFailToString(InstructionFail fail) -{ - switch (fail) - { -#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) - case InstructionFail::NONE: - ret("NONE"); - case InstructionFail::SSE3: - ret("SSE3"); - case InstructionFail::SSSE3: - ret("SSSE3"); - case InstructionFail::SSE4_1: - ret("SSE4.1"); - case InstructionFail::SSE4_2: - ret("SSE4.2"); - case InstructionFail::POPCNT: - ret("POPCNT"); - case InstructionFail::AVX: - ret("AVX"); - case InstructionFail::AVX2: - ret("AVX2"); - case InstructionFail::AVX512: - ret("AVX512"); -#undef ret - } } -sigjmp_buf jmpbuf; - -[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *) -{ - siglongjmp(jmpbuf, 1); -} - -/// Check if necessary SSE extensions are available by trying to execute some sse instructions. -/// If instruction is unavailable, SIGILL will be sent by kernel. -void checkRequiredInstructionsImpl(volatile InstructionFail & fail) -{ -#if defined(__SSE3__) - fail = InstructionFail::SSE3; - __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); -#endif - -#if defined(__SSSE3__) - fail = InstructionFail::SSSE3; - __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); - -#endif - -#if defined(__SSE4_1__) - fail = InstructionFail::SSE4_1; - __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); -#endif - -#if defined(__SSE4_2__) - fail = InstructionFail::SSE4_2; - __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); -#endif - - /// Defined by -msse4.2 -#if defined(__POPCNT__) - fail = InstructionFail::POPCNT; - { - uint64_t a = 0; - uint64_t b = 0; - __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); - } -#endif - -#if defined(__AVX__) - fail = InstructionFail::AVX; - __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); -#endif - -#if defined(__AVX2__) - fail = InstructionFail::AVX2; - __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); -#endif - -#if defined(__AVX512__) - fail = InstructionFail::AVX512; - __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); -#endif - - fail = InstructionFail::NONE; -} - -/// Macros to avoid using strlen(), since it may fail if SSE is not supported. -#define writeError(data) do \ - { \ - static_assert(__builtin_constant_p(data)); \ - if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ - _Exit(1); \ - } while (false) - -/// Check SSE and others instructions availability. Calls exit on fail. -/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. -void checkRequiredInstructions() -{ - struct sigaction sa{}; - struct sigaction sa_old{}; - sa.sa_sigaction = sigIllCheckHandler; - sa.sa_flags = SA_SIGINFO; - auto signal = SIGILL; - if (sigemptyset(&sa.sa_mask) != 0 - || sigaddset(&sa.sa_mask, signal) != 0 - || sigaction(signal, &sa, &sa_old) != 0) - { - /// You may wonder about strlen. - /// Typical implementation of strlen is using SSE4.2 or AVX2. - /// But this is not the case because it's compiler builtin and is executed at compile time. - - writeError("Can not set signal handler\n"); - _Exit(1); - } - - volatile InstructionFail fail = InstructionFail::NONE; - - if (sigsetjmp(jmpbuf, 1)) - { - writeError("Instruction check fail. The CPU does not support "); - if (!std::apply(writeRetry, instructionFailToString(fail))) - _Exit(1); - writeError(" instruction set.\n"); - _Exit(1); - } - - checkRequiredInstructionsImpl(fail); - - if (sigaction(signal, &sa_old, nullptr)) - { - writeError("Can not set signal handler\n"); - _Exit(1); - } -} - -struct Checker -{ - Checker() - { - checkRequiredInstructions(); - } -} checker -#ifndef OS_DARWIN - __attribute__((init_priority(101))) /// Run before other static initializers. -#endif -; - - -#if !defined(USE_MUSL) -/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. -void checkHarmfulEnvironmentVariables(char ** argv) -{ - std::initializer_list harmful_env_variables = { - /// The list is a selection from "man ld-linux". - "LD_PRELOAD", - "LD_LIBRARY_PATH", - "LD_ORIGIN_PATH", - "LD_AUDIT", - "LD_DYNAMIC_WEAK", - /// The list is a selection from "man dyld" (osx). - "DYLD_LIBRARY_PATH", - "DYLD_FALLBACK_LIBRARY_PATH", - "DYLD_VERSIONED_LIBRARY_PATH", - "DYLD_INSERT_LIBRARIES", - }; - - bool require_reexec = false; - for (const auto * var : harmful_env_variables) - { - if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe) - { - /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful - if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently - { - fmt::print(stderr, "Cannot override {} environment variable", var); - _exit(1); - } - require_reexec = true; - } - } - - if (require_reexec) - { - /// Use execvp() over execv() to search in PATH. - /// - /// This should be safe, since: - /// - if argv[0] is relative path - it is OK - /// - if argv[0] has only basename, the it will search in PATH, like shell will do. - /// - /// Also note, that this (search in PATH) because there is no easy and - /// portable way to get absolute path of argv[0]. - /// - on linux there is /proc/self/exec and AT_EXECFN - /// - but on other OSes there is no such thing (especially on OSX). - /// - /// And since static linking will be done someday anyway, - /// let's not pollute the code base with special cases. - int error = execvp(argv[0], argv); - _exit(error); - } -} -#endif - - -#if defined(SANITIZE_COVERAGE) -__attribute__((no_sanitize("coverage"))) -void dumpCoverage() -{ - /// A user can request to dump the coverage information into files at exit. - /// This is useful for non-server applications such as clickhouse-format or clickhouse-client, - /// that cannot introspect it with SQL functions at runtime. - - /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid' - /// containing the list of addresses of covered . - - /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header. - - if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe) - { - auto dump = [](const std::string & name, auto span) - { - /// Write only non-zeros. - std::vector data; - data.reserve(span.size()); - for (auto addr : span) - if (addr) - data.push_back(addr); - - int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400); - if (-1 == fd) - { - writeError("Cannot open a file to write the coverage data\n"); - } - else - { - if (!writeRetry(fd, reinterpret_cast(data.data()), data.size() * sizeof(data[0]))) - writeError("Cannot write the coverage data to a file\n"); - if (0 != ::close(fd)) - writeError("Cannot close the file with coverage data\n"); - } - }; - - dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage()); - } -} -#endif - -} - bool isClickhouseApp(std::string_view app_suffix, std::vector & argv) { /// Use app if the first arg 'app' is passed (the arg should be quietly removed) diff --git a/programs/main.cpp b/programs/main.cpp index 61e2bc18ed7..eecbe3a6876 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -1,5 +1,3 @@ -#include -#include #include #include @@ -7,7 +5,6 @@ #include #include #include -#include #include #include /// pair @@ -16,6 +13,9 @@ #include "config.h" #include "config_tools.h" + +#include +#include #include #include #include @@ -119,268 +119,6 @@ std::pair clickhouse_short_names[] = {"chc", "client"}, }; - -enum class InstructionFail : uint8_t -{ - NONE = 0, - SSE3 = 1, - SSSE3 = 2, - SSE4_1 = 3, - SSE4_2 = 4, - POPCNT = 5, - AVX = 6, - AVX2 = 7, - AVX512 = 8 -}; - -auto instructionFailToString(InstructionFail fail) -{ - switch (fail) - { -#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) - case InstructionFail::NONE: - ret("NONE"); - case InstructionFail::SSE3: - ret("SSE3"); - case InstructionFail::SSSE3: - ret("SSSE3"); - case InstructionFail::SSE4_1: - ret("SSE4.1"); - case InstructionFail::SSE4_2: - ret("SSE4.2"); - case InstructionFail::POPCNT: - ret("POPCNT"); - case InstructionFail::AVX: - ret("AVX"); - case InstructionFail::AVX2: - ret("AVX2"); - case InstructionFail::AVX512: - ret("AVX512"); -#undef ret - } -} - - -sigjmp_buf jmpbuf; - -[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *) -{ - siglongjmp(jmpbuf, 1); -} - -/// Check if necessary SSE extensions are available by trying to execute some sse instructions. -/// If instruction is unavailable, SIGILL will be sent by kernel. -void checkRequiredInstructionsImpl(volatile InstructionFail & fail) -{ -#if defined(__SSE3__) - fail = InstructionFail::SSE3; - __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); -#endif - -#if defined(__SSSE3__) - fail = InstructionFail::SSSE3; - __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); - -#endif - -#if defined(__SSE4_1__) - fail = InstructionFail::SSE4_1; - __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); -#endif - -#if defined(__SSE4_2__) - fail = InstructionFail::SSE4_2; - __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); -#endif - - /// Defined by -msse4.2 -#if defined(__POPCNT__) - fail = InstructionFail::POPCNT; - { - uint64_t a = 0; - uint64_t b = 0; - __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); - } -#endif - -#if defined(__AVX__) - fail = InstructionFail::AVX; - __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); -#endif - -#if defined(__AVX2__) - fail = InstructionFail::AVX2; - __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); -#endif - -#if defined(__AVX512__) - fail = InstructionFail::AVX512; - __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); -#endif - - fail = InstructionFail::NONE; -} - -/// Macros to avoid using strlen(), since it may fail if SSE is not supported. -#define writeError(data) do \ - { \ - static_assert(__builtin_constant_p(data)); \ - if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ - _Exit(1); \ - } while (false) - -/// Check SSE and others instructions availability. Calls exit on fail. -/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. -void checkRequiredInstructions() -{ - struct sigaction sa{}; - struct sigaction sa_old{}; - sa.sa_sigaction = sigIllCheckHandler; - sa.sa_flags = SA_SIGINFO; - auto signal = SIGILL; - if (sigemptyset(&sa.sa_mask) != 0 - || sigaddset(&sa.sa_mask, signal) != 0 - || sigaction(signal, &sa, &sa_old) != 0) - { - /// You may wonder about strlen. - /// Typical implementation of strlen is using SSE4.2 or AVX2. - /// But this is not the case because it's compiler builtin and is executed at compile time. - - writeError("Can not set signal handler\n"); - _Exit(1); - } - - volatile InstructionFail fail = InstructionFail::NONE; - - if (sigsetjmp(jmpbuf, 1)) - { - writeError("Instruction check fail. The CPU does not support "); - if (!std::apply(writeRetry, instructionFailToString(fail))) - _Exit(1); - writeError(" instruction set.\n"); - _Exit(1); - } - - checkRequiredInstructionsImpl(fail); - - if (sigaction(signal, &sa_old, nullptr)) - { - writeError("Can not set signal handler\n"); - _Exit(1); - } -} - -struct Checker -{ - Checker() - { - checkRequiredInstructions(); - } -} checker -#ifndef OS_DARWIN - __attribute__((init_priority(101))) /// Run before other static initializers. -#endif -; - - -#if !defined(USE_MUSL) -/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. -void checkHarmfulEnvironmentVariables(char ** argv) -{ - std::initializer_list harmful_env_variables = { - /// The list is a selection from "man ld-linux". - "LD_PRELOAD", - "LD_LIBRARY_PATH", - "LD_ORIGIN_PATH", - "LD_AUDIT", - "LD_DYNAMIC_WEAK", - /// The list is a selection from "man dyld" (osx). - "DYLD_LIBRARY_PATH", - "DYLD_FALLBACK_LIBRARY_PATH", - "DYLD_VERSIONED_LIBRARY_PATH", - "DYLD_INSERT_LIBRARIES", - }; - - bool require_reexec = false; - for (const auto * var : harmful_env_variables) - { - if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe) - { - /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful - if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently - { - fmt::print(stderr, "Cannot override {} environment variable", var); - _exit(1); - } - require_reexec = true; - } - } - - if (require_reexec) - { - /// Use execvp() over execv() to search in PATH. - /// - /// This should be safe, since: - /// - if argv[0] is relative path - it is OK - /// - if argv[0] has only basename, the it will search in PATH, like shell will do. - /// - /// Also note, that this (search in PATH) because there is no easy and - /// portable way to get absolute path of argv[0]. - /// - on linux there is /proc/self/exec and AT_EXECFN - /// - but on other OSes there is no such thing (especially on OSX). - /// - /// And since static linking will be done someday anyway, - /// let's not pollute the code base with special cases. - int error = execvp(argv[0], argv); - _exit(error); - } -} -#endif - - -#if defined(SANITIZE_COVERAGE) -__attribute__((no_sanitize("coverage"))) -void dumpCoverage() -{ - /// A user can request to dump the coverage information into files at exit. - /// This is useful for non-server applications such as clickhouse-format or clickhouse-client, - /// that cannot introspect it with SQL functions at runtime. - - /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid' - /// containing the list of addresses of covered . - - /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header. - - if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe) - { - auto dump = [](const std::string & name, auto span) - { - /// Write only non-zeros. - std::vector data; - data.reserve(span.size()); - for (auto addr : span) - if (addr) - data.push_back(addr); - - int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400); - if (-1 == fd) - { - writeError("Cannot open a file to write the coverage data\n"); - } - else - { - if (!writeRetry(fd, reinterpret_cast(data.data()), data.size() * sizeof(data[0]))) - writeError("Cannot write the coverage data to a file\n"); - if (0 != ::close(fd)) - writeError("Cannot close the file with coverage data\n"); - } - }; - - dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage()); - } -} -#endif - } bool isClickhouseApp(std::string_view app_suffix, std::vector & argv) diff --git a/src/Common/Coverage.cpp b/src/Common/Coverage.cpp new file mode 100644 index 00000000000..fa8da1f9e15 --- /dev/null +++ b/src/Common/Coverage.cpp @@ -0,0 +1,45 @@ +#include + +#if defined(SANITIZE_COVERAGE) +__attribute__((no_sanitize("coverage"))) +void dumpCoverage() +{ + /// A user can request to dump the coverage information into files at exit. + /// This is useful for non-server applications such as clickhouse-format or clickhouse-client, + /// that cannot introspect it with SQL functions at runtime. + + /// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid' + /// containing the list of addresses of covered . + + /// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header. + + if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe) + { + auto dump = [](const std::string & name, auto span) + { + /// Write only non-zeros. + std::vector data; + data.reserve(span.size()); + for (auto addr : span) + if (addr) + data.push_back(addr); + + int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400); + if (-1 == fd) + { + writeError("Cannot open a file to write the coverage data\n"); + } + else + { + if (!writeRetry(fd, reinterpret_cast(data.data()), data.size() * sizeof(data[0]))) + writeError("Cannot write the coverage data to a file\n"); + if (0 != ::close(fd)) + writeError("Cannot close the file with coverage data\n"); + } + }; + + dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage()); + } +} +#endif + diff --git a/src/Common/Coverage.h b/src/Common/Coverage.h new file mode 100644 index 00000000000..aa6dd2825ed --- /dev/null +++ b/src/Common/Coverage.h @@ -0,0 +1,5 @@ +#pragma once + +#if defined(SANITIZE_COVERAGE) +void dumpCoverage(); +#endif diff --git a/src/Common/EnvironmentChecks.cpp b/src/Common/EnvironmentChecks.cpp new file mode 100644 index 00000000000..d69e8cbaa3d --- /dev/null +++ b/src/Common/EnvironmentChecks.cpp @@ -0,0 +1,234 @@ +#include +#include + +#include + +#include +#include +#include + +#include + +#include + +namespace +{ + +enum class InstructionFail : uint8_t +{ + NONE = 0, + SSE3 = 1, + SSSE3 = 2, + SSE4_1 = 3, + SSE4_2 = 4, + POPCNT = 5, + AVX = 6, + AVX2 = 7, + AVX512 = 8 +}; + +auto instructionFailToString(InstructionFail fail) +{ + switch (fail) + { +#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1) + case InstructionFail::NONE: + ret("NONE"); + case InstructionFail::SSE3: + ret("SSE3"); + case InstructionFail::SSSE3: + ret("SSSE3"); + case InstructionFail::SSE4_1: + ret("SSE4.1"); + case InstructionFail::SSE4_2: + ret("SSE4.2"); + case InstructionFail::POPCNT: + ret("POPCNT"); + case InstructionFail::AVX: + ret("AVX"); + case InstructionFail::AVX2: + ret("AVX2"); + case InstructionFail::AVX512: + ret("AVX512"); +#undef ret + } +} + + +sigjmp_buf jmpbuf; + +[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *) +{ + siglongjmp(jmpbuf, 1); +} + +/// Check if necessary SSE extensions are available by trying to execute some sse instructions. +/// If instruction is unavailable, SIGILL will be sent by kernel. +void checkRequiredInstructionsImpl(volatile InstructionFail & fail) +{ +#if defined(__SSE3__) + fail = InstructionFail::SSE3; + __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if defined(__SSSE3__) + fail = InstructionFail::SSSE3; + __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); + +#endif + +#if defined(__SSE4_1__) + fail = InstructionFail::SSE4_1; + __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); +#endif + +#if defined(__SSE4_2__) + fail = InstructionFail::SSE4_2; + __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); +#endif + + /// Defined by -msse4.2 +#if defined(__POPCNT__) + fail = InstructionFail::POPCNT; + { + uint64_t a = 0; + uint64_t b = 0; + __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); + } +#endif + +#if defined(__AVX__) + fail = InstructionFail::AVX; + __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if defined(__AVX2__) + fail = InstructionFail::AVX2; + __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); +#endif + +#if defined(__AVX512__) + fail = InstructionFail::AVX512; + __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); +#endif + + fail = InstructionFail::NONE; +} + +/// Macros to avoid using strlen(), since it may fail if SSE is not supported. +#define writeError(data) do \ + { \ + static_assert(__builtin_constant_p(data)); \ + if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ + _Exit(1); \ + } while (false) + +/// Check SSE and others instructions availability. Calls exit on fail. +/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. +void checkRequiredInstructions() +{ + struct sigaction sa{}; + struct sigaction sa_old{}; + sa.sa_sigaction = sigIllCheckHandler; + sa.sa_flags = SA_SIGINFO; + auto signal = SIGILL; + if (sigemptyset(&sa.sa_mask) != 0 + || sigaddset(&sa.sa_mask, signal) != 0 + || sigaction(signal, &sa, &sa_old) != 0) + { + /// You may wonder about strlen. + /// Typical implementation of strlen is using SSE4.2 or AVX2. + /// But this is not the case because it's compiler builtin and is executed at compile time. + + writeError("Can not set signal handler\n"); + _Exit(1); + } + + volatile InstructionFail fail = InstructionFail::NONE; + + if (sigsetjmp(jmpbuf, 1)) + { + writeError("Instruction check fail. The CPU does not support "); + if (!std::apply(writeRetry, instructionFailToString(fail))) + _Exit(1); + writeError(" instruction set.\n"); + _Exit(1); + } + + checkRequiredInstructionsImpl(fail); + + if (sigaction(signal, &sa_old, nullptr)) + { + writeError("Can not set signal handler\n"); + _Exit(1); + } +} + +struct Checker +{ + Checker() + { + checkRequiredInstructions(); + } +} checker +#ifndef OS_DARWIN + __attribute__((init_priority(101))) /// Run before other static initializers. +#endif +; + +} + + +#if !defined(USE_MUSL) +/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. +void checkHarmfulEnvironmentVariables(char ** argv) +{ + std::initializer_list harmful_env_variables = { + /// The list is a selection from "man ld-linux". + "LD_PRELOAD", + "LD_LIBRARY_PATH", + "LD_ORIGIN_PATH", + "LD_AUDIT", + "LD_DYNAMIC_WEAK", + /// The list is a selection from "man dyld" (osx). + "DYLD_LIBRARY_PATH", + "DYLD_FALLBACK_LIBRARY_PATH", + "DYLD_VERSIONED_LIBRARY_PATH", + "DYLD_INSERT_LIBRARIES", + }; + + bool require_reexec = false; + for (const auto * var : harmful_env_variables) + { + if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe) + { + /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful + if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently + { + fmt::print(stderr, "Cannot override {} environment variable", var); + _exit(1); + } + require_reexec = true; + } + } + + if (require_reexec) + { + /// Use execvp() over execv() to search in PATH. + /// + /// This should be safe, since: + /// - if argv[0] is relative path - it is OK + /// - if argv[0] has only basename, the it will search in PATH, like shell will do. + /// + /// Also note, that this (search in PATH) because there is no easy and + /// portable way to get absolute path of argv[0]. + /// - on linux there is /proc/self/exec and AT_EXECFN + /// - but on other OSes there is no such thing (especially on OSX). + /// + /// And since static linking will be done someday anyway, + /// let's not pollute the code base with special cases. + int error = execvp(argv[0], argv); + _exit(error); + } +} +#endif diff --git a/src/Common/EnvironmentChecks.h b/src/Common/EnvironmentChecks.h new file mode 100644 index 00000000000..6d355a69ff9 --- /dev/null +++ b/src/Common/EnvironmentChecks.h @@ -0,0 +1,5 @@ +#pragma once + +#if !defined(USE_MUSL) +void checkHarmfulEnvironmentVariables(char ** argv); +#endif From 39a371b27de3f9f1ee94f6da0ce1b78ab527d6f0 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 15:07:31 +0000 Subject: [PATCH 202/273] Bump vectorscan --- contrib/vectorscan | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/vectorscan b/contrib/vectorscan index 38431d11178..4918f81ea3d 160000 --- a/contrib/vectorscan +++ b/contrib/vectorscan @@ -1 +1 @@ -Subproject commit 38431d111781843741a781a57a6381a527d900a4 +Subproject commit 4918f81ea3d1abd18905bac9876d4a1fe2ebdf07 From 9a023744a5825d349c1027e1c1d425956a3bc87f Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 3 Jul 2024 11:13:39 -0400 Subject: [PATCH 203/273] fix build --- src/Storages/StorageFuzzQuery.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp index 229ae1af7c1..6e8f425f8dc 100644 --- a/src/Storages/StorageFuzzQuery.cpp +++ b/src/Storages/StorageFuzzQuery.cpp @@ -1,14 +1,12 @@ #include #include -#include #include #include #include #include #include #include -#include #include #include @@ -41,10 +39,8 @@ ColumnPtr FuzzQuerySource::createColumn() fuzzer.fuzzMain(new_query); auto fuzzed_text = new_query->formatForErrorMessage(); - WriteBufferFromOwnString out; - formatAST(*new_query, out, false); - auto data = out.str(); - size_t data_len = data.size(); + if (base_before_fuzz == fuzzed_text) + continue; /// AST is too long, will start from the original query. if (config.max_query_length > 500) @@ -53,12 +49,12 @@ ColumnPtr FuzzQuerySource::createColumn() continue; } - IColumn::Offset next_offset = offset + data_len + 1; + IColumn::Offset next_offset = offset + fuzzed_text.size() + 1; data_to.resize(next_offset); - std::copy(data.begin(), data.end(), &data_to[offset]); + std::copy(fuzzed_text.begin(), fuzzed_text.end(), &data_to[offset]); - data_to[offset + data_len] = 0; + data_to[offset + fuzzed_text.size()] = 0; offsets_to[row_num] = next_offset; offset = next_offset; From ee3c530817d7cf76e27ed61c1b2be532acf3b32c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 15:40:41 +0000 Subject: [PATCH 204/273] Remove obsolete comment --- src/Functions/generateUUIDv7.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/Functions/generateUUIDv7.cpp b/src/Functions/generateUUIDv7.cpp index b226c0840f4..b1807a3fd35 100644 --- a/src/Functions/generateUUIDv7.cpp +++ b/src/Functions/generateUUIDv7.cpp @@ -11,20 +11,6 @@ namespace /* Bit layouts of UUIDv7 -without counter: - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| unix_ts_ms | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| unix_ts_ms | ver | rand_a | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -|var| rand_b | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -| rand_b | -└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ - -with counter: 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ From 6aa2f7d5a0997fd56432f4550b3353e5d7cc6898 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 3 Jul 2024 17:02:00 +0100 Subject: [PATCH 205/273] adjust reference file --- .../0_stateless/02982_aggregation_states_destruction.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference index 72749c905a3..d00491fd7e5 100644 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference +++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference @@ -1 +1 @@ -1 1 1 +1 From e8701dc4e4b1893dbe507d5180f8367a6d4b7689 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 3 Jul 2024 18:16:26 +0200 Subject: [PATCH 206/273] Fix shutdown in GRPCServer. --- src/Server/GRPCServer.cpp | 102 ++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 43 deletions(-) diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 10b59751b22..cb36df1efc0 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -1735,10 +1735,19 @@ namespace class GRPCServer::Runner { public: - explicit Runner(GRPCServer & owner_) : owner(owner_) {} + explicit Runner(GRPCServer & owner_) : owner(owner_), log(owner.log) {} ~Runner() { + try + { + stop(); + } + catch (...) + { + tryLogCurrentException(log, "~Runner"); + } + if (queue_thread.joinable()) queue_thread.join(); } @@ -1756,13 +1765,27 @@ public: } catch (...) { - tryLogCurrentException("GRPCServer"); + tryLogCurrentException(log, "run"); } }; queue_thread = ThreadFromGlobalPool{runner_function}; } - void stop() { stopReceivingNewCalls(); } + void stop() + { + std::lock_guard lock{mutex}; + should_stop = true; + + if (current_calls.empty()) + { + /// If there are no current calls then we call shutdownQueue() to signal the queue to stop waiting for next events. + /// The following line will make CompletionQueue::Next() stop waiting if the queue is empty and return false instead. + shutdownQueue(); + + /// If there are some current calls then we can't call shutdownQueue() right now because we want to let the current calls finish. + /// In this case function shutdownQueue() will be called later in run(). + } + } size_t getNumCurrentCalls() const { @@ -1789,12 +1812,6 @@ private: [this, call_type](bool ok) { onNewCall(call_type, ok); }); } - void stopReceivingNewCalls() - { - std::lock_guard lock{mutex}; - should_stop = true; - } - void onNewCall(CallType call_type, bool responder_started_ok) { std::lock_guard lock{mutex}; @@ -1827,38 +1844,47 @@ private: void run() { setThreadName("GRPCServerQueue"); - while (true) + + bool ok = false; + void * tag = nullptr; + + while (owner.queue->Next(&tag, &ok)) { - { - std::lock_guard lock{mutex}; - finished_calls.clear(); /// Destroy finished calls. - - /// If (should_stop == true) we continue processing until there is no active calls. - if (should_stop && current_calls.empty()) - { - bool all_responders_gone = std::all_of( - responders_for_new_calls.begin(), responders_for_new_calls.end(), - [](std::unique_ptr & responder) { return !responder; }); - if (all_responders_gone) - break; - } - } - - bool ok = false; - void * tag = nullptr; - if (!owner.queue->Next(&tag, &ok)) - { - /// Queue shutted down. - break; - } - auto & callback = *static_cast(tag); callback(ok); + + std::lock_guard lock{mutex}; + finished_calls.clear(); /// Destroy finished calls. + + /// If (should_stop == true) we continue processing while there are current calls. + if (should_stop && current_calls.empty()) + shutdownQueue(); } + + /// CompletionQueue::Next() returns false if the queue is fully drained and shut down. + } + + /// Shutdown the queue if that isn't done yet. + void shutdownQueue() + { + chassert(should_stop); + if (queue_is_shut_down) + return; + + queue_is_shut_down = true; + + /// Server should be shut down before CompletionQueue. + if (owner.grpc_server) + owner.grpc_server->Shutdown(); + + if (owner.queue) + owner.queue->Shutdown(); } GRPCServer & owner; + LoggerRawPtr log; ThreadFromGlobalPool queue_thread; + bool queue_is_shut_down = false; std::vector> responders_for_new_calls; std::map> current_calls; std::vector> finished_calls; @@ -1876,16 +1902,6 @@ GRPCServer::GRPCServer(IServer & iserver_, const Poco::Net::SocketAddress & addr GRPCServer::~GRPCServer() { - /// Server should be shutdown before CompletionQueue. - if (grpc_server) - grpc_server->Shutdown(); - - /// Completion Queue should be shutdown before destroying the runner, - /// because the runner is now probably executing CompletionQueue::Next() on queue_thread - /// which is blocked until an event is available or the queue is shutting down. - if (queue) - queue->Shutdown(); - runner.reset(); } From 8b14754005b52bea1422021aac0ed774f82a7946 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 16:29:47 +0000 Subject: [PATCH 207/273] Fix ARM build (upgrade sysroot) --- contrib/sysroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sysroot b/contrib/sysroot index 39c4713334f..cc385041b22 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit 39c4713334f9f156dbf508f548d510d9129a657c +Subproject commit cc385041b226d1fc28ead14dbab5d40a5f821dd8 From d0e3a6906015b34290d3ab3fdd0ab67716f55e29 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 18:41:16 +0200 Subject: [PATCH 208/273] make 03008_deduplication_mv_generates_several_blocks_nonreplicated thinner --- .../0_stateless/03008_deduplication.python | 14 +- ...tes_several_blocks_nonreplicated.reference | 256 +++++++++--------- 2 files changed, 135 insertions(+), 135 deletions(-) diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python index 89dbea97667..dd1058518c9 100644 --- a/tests/queries/0_stateless/03008_deduplication.python +++ b/tests/queries/0_stateless/03008_deduplication.python @@ -390,14 +390,14 @@ def test_mv_generates_several_blocks(parser): SELECT throwIf( count() != 5 ) FROM table_a_b; - SELECT throwIf( count() != 47 ) + SELECT throwIf( count() != 9 ) FROM table_when_b_even_and_joined; """ assert_second_insert_statements = f""" SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) FROM table_a_b; - SELECT throwIf( count() != {47 if args.deduplicate_dst_table else 94} ) + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 18} ) FROM table_when_b_even_and_joined; """ else: @@ -406,14 +406,14 @@ def test_mv_generates_several_blocks(parser): SELECT throwIf( count() != {5 if args.deduplicate_src_table else 5} ) FROM table_a_b; - SELECT throwIf( count() != {45 if args.deduplicate_dst_table else 45} ) + SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 10} ) FROM table_when_b_even_and_joined; """ assert_second_insert_statements = f""" SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) FROM table_a_b; - SELECT throwIf( count() != {45 if args.deduplicate_dst_table else 90} ) + SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) FROM table_when_b_even_and_joined; """ else: @@ -421,14 +421,14 @@ def test_mv_generates_several_blocks(parser): SELECT throwIf( count() != {1 if args.deduplicate_src_table else 5} ) FROM table_a_b; - SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 45} ) + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 10} ) FROM table_when_b_even_and_joined; """ assert_second_insert_statements = f""" SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) FROM table_a_b; - SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 90} ) + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 20} ) FROM table_when_b_even_and_joined; """ @@ -451,7 +451,7 @@ def test_mv_generates_several_blocks(parser): ORDER BY (a_join, b); INSERT INTO table_for_join_with SELECT 'joined_' || toString(number), number - FROM numbers(9); + FROM numbers(1); {details_print_for_table_for_join_with} {create_table_a_b_statement} diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference index 76ef4cf6b2c..6e76ec46aa8 100644 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference @@ -3,13 +3,13 @@ Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -18,13 +18,13 @@ Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -33,13 +33,13 @@ Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -48,13 +48,13 @@ Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -63,13 +63,13 @@ Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -78,13 +78,13 @@ Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -93,13 +93,13 @@ Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -108,13 +108,13 @@ Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -123,13 +123,13 @@ Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -138,13 +138,13 @@ Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True s table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -153,13 +153,13 @@ Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -168,13 +168,13 @@ Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -183,13 +183,13 @@ Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -198,13 +198,13 @@ Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -213,13 +213,13 @@ Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -228,13 +228,13 @@ Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -243,13 +243,13 @@ Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -258,13 +258,13 @@ Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -273,13 +273,13 @@ Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -288,13 +288,13 @@ Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -303,13 +303,13 @@ Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -318,13 +318,13 @@ Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -333,13 +333,13 @@ Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -348,13 +348,13 @@ Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -363,13 +363,13 @@ Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -378,13 +378,13 @@ Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -393,13 +393,13 @@ Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -408,13 +408,13 @@ Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -423,13 +423,13 @@ Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -438,13 +438,13 @@ Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -453,13 +453,13 @@ Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -468,13 +468,13 @@ Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -483,13 +483,13 @@ Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -498,13 +498,13 @@ Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -513,13 +513,13 @@ Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -528,13 +528,13 @@ Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -543,13 +543,13 @@ Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -558,13 +558,13 @@ Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -573,13 +573,13 @@ Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -588,13 +588,13 @@ Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -603,13 +603,13 @@ Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -618,13 +618,13 @@ Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -633,13 +633,13 @@ Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -648,13 +648,13 @@ Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -663,13 +663,13 @@ Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -678,13 +678,13 @@ Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -693,13 +693,13 @@ Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -708,13 +708,13 @@ Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -723,13 +723,13 @@ Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -738,13 +738,13 @@ Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -753,13 +753,13 @@ Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -768,13 +768,13 @@ Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -783,13 +783,13 @@ Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -798,13 +798,13 @@ Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -813,13 +813,13 @@ Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -828,13 +828,13 @@ Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -843,13 +843,13 @@ Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -858,13 +858,13 @@ Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -873,13 +873,13 @@ Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -888,13 +888,13 @@ Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -903,13 +903,13 @@ Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -918,13 +918,13 @@ Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -933,13 +933,13 @@ Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -948,13 +948,13 @@ Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK From 5875694669acc70b07ea5422902f6a6549f4f62b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 18:48:32 +0200 Subject: [PATCH 209/273] Fix includes --- programs/main.cpp | 1 - src/Common/Coverage.cpp | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/programs/main.cpp b/programs/main.cpp index eecbe3a6876..02ea1471108 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -13,7 +13,6 @@ #include "config.h" #include "config_tools.h" - #include #include #include diff --git a/src/Common/Coverage.cpp b/src/Common/Coverage.cpp index fa8da1f9e15..a21efe62fb6 100644 --- a/src/Common/Coverage.cpp +++ b/src/Common/Coverage.cpp @@ -1,6 +1,26 @@ #include #if defined(SANITIZE_COVERAGE) + +#include +#include + +#include +#include + +#include +#include + +#include + +/// Macros to avoid using strlen(), since it may fail if SSE is not supported. +#define writeError(data) do \ + { \ + static_assert(__builtin_constant_p(data)); \ + if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \ + _Exit(1); \ + } while (false) + __attribute__((no_sanitize("coverage"))) void dumpCoverage() { From 045cb0a5819e6e41198cd9cb05e27fb04aa08269 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 12:52:25 +0200 Subject: [PATCH 210/273] Increase special allocation sampling --- src/Common/GWPAsan.cpp | 2 +- src/Core/ServerSettings.h | 2 +- src/IO/BufferWithOwnMemory.h | 15 +++++++++++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/Common/GWPAsan.cpp b/src/Common/GWPAsan.cpp index 0482ddb4e2b..48fbd07ec34 100644 --- a/src/Common/GWPAsan.cpp +++ b/src/Common/GWPAsan.cpp @@ -57,7 +57,7 @@ static bool guarded_alloc_initialized = [] opts.MaxSimultaneousAllocations = 1024; if (!env_options_raw || !std::string_view{env_options_raw}.contains("SampleRate")) - opts.SampleRate = 8000; + opts.SampleRate = 10000; const char * collect_stacktraces = std::getenv("GWP_ASAN_COLLECT_STACKTRACES"); // NOLINT(concurrency-mt-unsafe) if (collect_stacktraces && std::string_view{collect_stacktraces} == "1") diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 68ac45fa24f..e70be61118a 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -153,7 +153,7 @@ namespace DB M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ - M(Double, gwp_asan_force_sample_probability, 0, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ + M(Double, gwp_asan_force_sample_probability, 0.001, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 5c9a69893df..0ec733f7840 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -4,12 +4,15 @@ #include #include +#include #include #include #include +#include "config.h" + namespace ProfileEvents { @@ -41,10 +44,13 @@ struct Memory : boost::noncopyable, Allocator char * m_data = nullptr; size_t alignment = 0; + [[maybe_unused]] bool allow_gwp_asan_force_sample; + Memory() = default; /// If alignment != 0, then allocate memory aligned to specified value. - explicit Memory(size_t size_, size_t alignment_ = 0) : alignment(alignment_) + explicit Memory(size_t size_, size_t alignment_ = 0, bool allow_gwp_asan_force_sample_ = false) + : alignment(alignment_), allow_gwp_asan_force_sample(allow_gwp_asan_force_sample_) { alloc(size_); } @@ -127,6 +133,11 @@ private: ProfileEvents::increment(ProfileEvents::IOBufferAllocs); ProfileEvents::increment(ProfileEvents::IOBufferAllocBytes, new_capacity); +#if USE_GWP_ASAN + if (unlikely(allow_gwp_asan_force_sample && GWPAsan::shouldForceSample())) + gwp_asan::getThreadLocals()->NextSampleCounter = 1; +#endif + m_data = static_cast(Allocator::alloc(new_capacity, alignment)); m_capacity = new_capacity; m_size = new_size; @@ -154,7 +165,7 @@ protected: public: /// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership. explicit BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) - : Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment) + : Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment, /*allow_gwp_asan_force_sample_=*/true) { Base::set(existing_memory ? existing_memory : memory.data(), size); Base::padded = !existing_memory; From 438fd899236b15468828c3dec751081fd07325d6 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 18:59:07 +0200 Subject: [PATCH 211/273] adjust 03008_deduplication_mv_generates_several_blocks_replicated --- ...erates_several_blocks_replicated.reference | 256 +++++++++--------- 1 file changed, 128 insertions(+), 128 deletions(-) diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference index a84539df16b..a25e8713c61 100644 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference @@ -3,13 +3,13 @@ Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -18,13 +18,13 @@ Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -33,13 +33,13 @@ Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -48,13 +48,13 @@ Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -63,13 +63,13 @@ Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -78,13 +78,13 @@ Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -93,13 +93,13 @@ Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -108,13 +108,13 @@ Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -123,13 +123,13 @@ Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -138,13 +138,13 @@ Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_to table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -153,13 +153,13 @@ Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -168,13 +168,13 @@ Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -183,13 +183,13 @@ Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -198,13 +198,13 @@ Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -213,13 +213,13 @@ Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -228,13 +228,13 @@ Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -243,13 +243,13 @@ Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -258,13 +258,13 @@ Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -273,13 +273,13 @@ Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -288,13 +288,13 @@ Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -303,13 +303,13 @@ Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -318,13 +318,13 @@ Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -333,13 +333,13 @@ Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -348,13 +348,13 @@ Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -363,13 +363,13 @@ Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -378,13 +378,13 @@ Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -393,13 +393,13 @@ Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -408,13 +408,13 @@ Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -423,13 +423,13 @@ Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -438,13 +438,13 @@ Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -453,13 +453,13 @@ Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -468,13 +468,13 @@ Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -483,13 +483,13 @@ Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -498,13 +498,13 @@ Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -513,13 +513,13 @@ Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -528,13 +528,13 @@ Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -543,13 +543,13 @@ Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -558,13 +558,13 @@ Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -573,13 +573,13 @@ Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -588,13 +588,13 @@ Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -603,13 +603,13 @@ Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -618,13 +618,13 @@ Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -633,13 +633,13 @@ Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -648,13 +648,13 @@ Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 5 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -663,13 +663,13 @@ Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -678,13 +678,13 @@ Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 45 +count 10 0 0 OK @@ -693,13 +693,13 @@ Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -708,13 +708,13 @@ Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -723,13 +723,13 @@ Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -738,13 +738,13 @@ Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -753,13 +753,13 @@ Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -768,13 +768,13 @@ Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -783,13 +783,13 @@ Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -798,13 +798,13 @@ Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -813,13 +813,13 @@ Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -828,13 +828,13 @@ Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -843,13 +843,13 @@ Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -858,13 +858,13 @@ Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 1 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -873,13 +873,13 @@ Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 5 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -888,13 +888,13 @@ Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 1 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 1 table_when_b_even_and_joined -count 90 +count 20 0 0 OK @@ -903,13 +903,13 @@ Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 47 +count 9 0 0 OK @@ -918,13 +918,13 @@ Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 9 +count 2 0 0 table_a_b count 10 table_when_b_even_and_joined -count 9 +count 2 0 0 OK @@ -933,13 +933,13 @@ Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 47 +count 9 0 0 table_a_b count 10 table_when_b_even_and_joined -count 94 +count 18 0 0 OK @@ -948,13 +948,13 @@ Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_t table_a_b count 5 table_when_b_even_and_joined -count 45 +count 10 0 0 table_a_b count 10 table_when_b_even_and_joined -count 90 +count 20 0 0 OK From d688d4114c6ac915aeb584587b985006a39c9f15 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Jul 2024 18:16:45 +0100 Subject: [PATCH 212/273] Rename events --- src/Common/ProfileEvents.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 2e3984f8f10..acd29a91450 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -239,8 +239,8 @@ M(CannotRemoveEphemeralNode, "Number of times an error happened while trying to remove ephemeral node. This is not an issue, because our implementation of ZooKeeper library guarantee that the session will expire and the node will be removed.") \ \ M(RegexpWithMultipleNeedlesCreated, "Regular expressions with multiple needles (VectorScan library) compiled.") \ - M(RegexpWithMultipleNeedlesCacheHit, "Number of times we fetched compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ - M(RegexpWithMultipleNeedlesCacheMiss, "Number of times we failed to fetch compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ + M(RegexpWithMultipleNeedlesGlobalCacheHit, "Number of times we fetched compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ + M(RegexpWithMultipleNeedlesGlobalCacheMiss, "Number of times we failed to fetch compiled regular expression with multiple needles (VectorScan library) from the global cache.") \ M(RegexpLocalCacheHit, "Number of times we fetched compiled regular expression from a local cache.") \ M(RegexpLocalCacheMiss, "Number of times we failed to fetch compiled regular expression from a local cache.") \ \ From 2ec0a9cfeaf850c31f1567da1884ad77fae0bcdd Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 3 Jul 2024 19:25:44 +0200 Subject: [PATCH 213/273] Fix test test_grpc_protocol/test.py::test_progress --- tests/integration/test_grpc_protocol/test.py | 60 ++++++++------------ 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index 851da99acf3..1e4ae7f0288 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -369,47 +369,33 @@ def test_progress(): "SELECT number, sleep(0.31) FROM numbers(8) SETTINGS max_block_size=2, interactive_delay=100000", stream_output=True, ) - results = list(results) - for result in results: - result.time_zone = "" - result.query_id = "" - # print(results) - # Note: We can't convert those messages to string like `results = str(results)` and then compare it as a string - # because str() can serialize a protobuf message with any order of fields. - expected_results = [ - clickhouse_grpc_pb2.Result( - output_format="TabSeparated", - progress=clickhouse_grpc_pb2.Progress( - read_rows=2, read_bytes=16, total_rows_to_read=8 - ), - ), - clickhouse_grpc_pb2.Result(output=b"0\t0\n1\t0\n"), - clickhouse_grpc_pb2.Result( - progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) - ), - clickhouse_grpc_pb2.Result(output=b"2\t0\n3\t0\n"), - clickhouse_grpc_pb2.Result( - progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) - ), - clickhouse_grpc_pb2.Result(output=b"4\t0\n5\t0\n"), - clickhouse_grpc_pb2.Result( - progress=clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16) - ), - clickhouse_grpc_pb2.Result(output=b"6\t0\n7\t0\n"), - clickhouse_grpc_pb2.Result( - stats=clickhouse_grpc_pb2.Stats( - rows=8, - blocks=4, - allocated_bytes=1092, - ) - ), + # Note: We can't compare results using a statement like `assert results == expected_results` + # because `results` can come in slightly different order. + # So we compare `outputs` and `progresses` separately and not `results` as a whole. + + outputs = [i.output for i in results if i.output] + progresses = [i.progress for i in results if i.HasField("progress")] + + # print(outputs) + # print(progresses) + + expected_outputs = [ + b"0\t0\n1\t0\n", + b"2\t0\n3\t0\n", + b"4\t0\n5\t0\n", + b"6\t0\n7\t0\n", ] - # Stats data can be returned, which broke the test - results = [i for i in results if not isinstance(i, clickhouse_grpc_pb2.Stats)] + expected_progresses = [ + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16, total_rows_to_read=8), + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16), + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16), + clickhouse_grpc_pb2.Progress(read_rows=2, read_bytes=16), + ] - assert results == expected_results + assert outputs == expected_outputs + assert progresses == expected_progresses def test_session_settings(): From 9737c5bab4779708e6a51bbb6739d8da34fc87bd Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 17:33:45 +0000 Subject: [PATCH 214/273] Probably fix tsan assert in test_mysql_killed_while_insert_8_0 --- contrib/openssl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/openssl b/contrib/openssl index 5d81fa7068f..ee2bb8513b2 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit 5d81fa7068fc8c07f4d0997d5b703f3c541a637c +Subproject commit ee2bb8513b28bf86b35404dd17a0e29305ca9e08 From 87dda31a2c234a7596b661ad5d63f74f3124ea25 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 3 Jul 2024 20:36:19 +0200 Subject: [PATCH 215/273] Add test for GRPCServer's shutdown. --- tests/integration/test_grpc_protocol/test.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index 851da99acf3..1ace5b361b8 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -39,6 +39,7 @@ node = cluster.add_instance( "TSAN_OPTIONS": "report_atomic_races=0 " + os.getenv("TSAN_OPTIONS", default="") }, ipv6_address=IPV6_ADDRESS, + stay_alive=True, ) main_channel = None @@ -763,3 +764,9 @@ def test_opentelemetry_context_propagation(): ) == "SELECT 1\tsome custom state\n" ) + + +def test_restart(): + assert query("SELECT 1") == "1\n" + node.restart_clickhouse() + assert query("SELECT 2") == "2\n" From 877445c88d71caffc79e6e7cd956298e84e9867e Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 19:21:12 +0000 Subject: [PATCH 216/273] Fix reading dynamic subcolumns from altered Memory table --- src/Interpreters/getColumnFromBlock.cpp | 30 +++++++++++++++++++ src/Interpreters/getColumnFromBlock.h | 1 + .../QueryPlan/ReadFromMemoryStorageStep.cpp | 20 +++++++++---- ...3200_memory_engine_alter_dynamic.reference | 10 +++++++ .../03200_memory_engine_alter_dynamic.sql | 7 +++++ 5 files changed, 62 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference create mode 100644 tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql diff --git a/src/Interpreters/getColumnFromBlock.cpp b/src/Interpreters/getColumnFromBlock.cpp index 972e109afb3..2e70a58b5a1 100644 --- a/src/Interpreters/getColumnFromBlock.cpp +++ b/src/Interpreters/getColumnFromBlock.cpp @@ -31,6 +31,36 @@ ColumnPtr tryGetColumnFromBlock(const Block & block, const NameAndTypePair & req return castColumn({elem_column, elem_type, ""}, requested_column.type); } +ColumnPtr tryGetSubcolumnFromBlock(const Block & block, const DataTypePtr & requested_column_type, const NameAndTypePair & requested_subcolumn) +{ + const auto * elem = block.findByName(requested_subcolumn.getNameInStorage()); + if (!elem) + return nullptr; + + auto subcolumn_name = requested_subcolumn.getSubcolumnName(); + /// If requested subcolumn is dynamic, we should first perform cast and then + /// extract the subcolumn, because the data of dynamic subcolumn can change after cast. + if (elem->type->hasDynamicSubcolumns() && !elem->type->equals(*requested_column_type)) + { + auto casted_column = castColumn({elem->column, elem->type, ""}, requested_column_type); + auto elem_column = requested_column_type->tryGetSubcolumn(subcolumn_name, casted_column); + auto elem_type = requested_column_type->tryGetSubcolumnType(subcolumn_name); + + if (!elem_type || !elem_column) + return nullptr; + + return elem_column; + } + + auto elem_column = elem->type->tryGetSubcolumn(subcolumn_name, elem->column); + auto elem_type = elem->type->tryGetSubcolumnType(subcolumn_name); + + if (!elem_type || !elem_column) + return nullptr; + + return castColumn({elem_column, elem_type, ""}, requested_subcolumn.type); +} + ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & requested_column) { auto result_column = tryGetColumnFromBlock(block, requested_column); diff --git a/src/Interpreters/getColumnFromBlock.h b/src/Interpreters/getColumnFromBlock.h index 26500cfdd17..737ce9db555 100644 --- a/src/Interpreters/getColumnFromBlock.h +++ b/src/Interpreters/getColumnFromBlock.h @@ -9,5 +9,6 @@ namespace DB ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & requested_column); ColumnPtr tryGetColumnFromBlock(const Block & block, const NameAndTypePair & requested_column); +ColumnPtr tryGetSubcolumnFromBlock(const Block & block, const DataTypePtr & requested_column_type, const NameAndTypePair & requested_subcolumn); } diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp index 2e7693b1b36..6dc0c021a14 100644 --- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp @@ -30,12 +30,15 @@ public: std::shared_ptr> parallel_execution_index_, InitializerFunc initializer_func_ = {}) : ISource(storage_snapshot->getSampleBlockForColumns(column_names_)) - , column_names_and_types(storage_snapshot->getColumnsByNames( + , requested_column_names_and_types(storage_snapshot->getColumnsByNames( GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withExtendedObjects(), column_names_)) , data(data_) , parallel_execution_index(parallel_execution_index_) , initializer_func(std::move(initializer_func_)) { + auto all_column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withExtendedObjects()); + for (const auto & [name, type] : all_column_names_and_types) + all_names_to_types[name] = type; } String getName() const override { return "Memory"; } @@ -59,17 +62,20 @@ protected: const Block & src = (*data)[current_index]; Columns columns; - size_t num_columns = column_names_and_types.size(); + size_t num_columns = requested_column_names_and_types.size(); columns.reserve(num_columns); - auto name_and_type = column_names_and_types.begin(); + auto name_and_type = requested_column_names_and_types.begin(); for (size_t i = 0; i < num_columns; ++i) { - columns.emplace_back(tryGetColumnFromBlock(src, *name_and_type)); + if (name_and_type->isSubcolumn()) + columns.emplace_back(tryGetSubcolumnFromBlock(src, all_names_to_types[name_and_type->getNameInStorage()], *name_and_type)); + else + columns.emplace_back(tryGetColumnFromBlock(src, *name_and_type)); ++name_and_type; } - fillMissingColumns(columns, src.rows(), column_names_and_types, column_names_and_types, {}, nullptr); + fillMissingColumns(columns, src.rows(), requested_column_names_and_types, requested_column_names_and_types, {}, nullptr); assert(std::all_of(columns.begin(), columns.end(), [](const auto & column) { return column != nullptr; })); return Chunk(std::move(columns), src.rows()); @@ -88,7 +94,9 @@ private: } } - const NamesAndTypesList column_names_and_types; + const NamesAndTypesList requested_column_names_and_types; + /// Map (name -> type) for all columns from the storage header. + std::unordered_map all_names_to_types; size_t execution_index = 0; std::shared_ptr data; std::shared_ptr> parallel_execution_index; diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference new file mode 100644 index 00000000000..6d2c1334d6e --- /dev/null +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference @@ -0,0 +1,10 @@ +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql new file mode 100644 index 00000000000..95823283812 --- /dev/null +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql @@ -0,0 +1,7 @@ +set allow_experimental_dynamic_type=1; +create table test (d Dynamic) engine=Memory; +insert into table test select * from numbers(5); +alter table test modify column d Dynamic(max_types=1); +select d.UInt64 from test settings allow_experimental_analyzer=1; +select d.UInt64 from test settings allow_experimental_analyzer=1; + From 1b6ef06a91cf31b2aa8dbe5e3494ec63d602e4c9 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 3 Jul 2024 21:14:28 +0100 Subject: [PATCH 217/273] review fixes --- src/Common/CgroupsMemoryUsageObserver.cpp | 26 ++++++++++++----------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 33393a8b9c6..d36c7fd08aa 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -1,5 +1,3 @@ -#include -#include #include #if defined(OS_LINUX) @@ -14,7 +12,9 @@ #include #include +#include #include +#include #include #include "config.h" @@ -59,9 +59,9 @@ uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & ke } assertChar(' ', buf); - uint64_t mem_usage = 0; - readIntText(mem_usage, buf); - return mem_usage; + uint64_t value = 0; + readIntText(value, buf); + return value; } throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find '{}' in '{}'", key, buf.getFileName()); @@ -96,10 +96,12 @@ struct CgroupsV2Reader : ICgroupsReader current_buf.rewind(); stat_buf.rewind(); - uint64_t mem_usage = 0; + int64_t mem_usage = 0; /// memory.current contains a single number + /// the reason why we subtract it described here: https://github.com/ClickHouse/ClickHouse/issues/64652#issuecomment-2149630667 readIntText(mem_usage, current_buf); mem_usage -= readMetricFromStatFile(stat_buf, "inactive_file"); + chassert(mem_usage >= 0, "Negative memory usage"); return mem_usage; } @@ -153,13 +155,13 @@ std::optional getCgroupsV1Path() std::pair getCgroupsPath() { - auto v2_file_name = getCgroupsV2Path(); - if (v2_file_name.has_value()) - return {*v2_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; + auto v2_path = getCgroupsV2Path(); + if (v2_path.has_value()) + return {*v2_path, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; - auto v1_file_name = getCgroupsV1Path(); - if (v1_file_name.has_value()) - return {*v1_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V1}; + auto v1_path = getCgroupsV1Path(); + if (v1_path.has_value()) + return {*v1_path, CgroupsMemoryUsageObserver::CgroupsVersion::V1}; throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot find cgroups v1 or v2 current memory file"); } From 5fb0fa3c3d3e611944f83ac06aaac2d6e5c0d0db Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 3 Jul 2024 20:21:37 +0000 Subject: [PATCH 218/273] Automatic style fix --- tests/integration/test_memory_limit_observer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index 2840c830396..0eda165b1d2 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -76,4 +76,4 @@ def test_memory_usage_doesnt_include_page_cache_size(started_cluster): WHERE logger_name = 'CgroupsMemoryUsageObserver' AND message LIKE 'Read current memory usage%bytes%' """ ).strip() - assert int(max_mem_usage_from_cgroup) < 2 * 2 ** 30 + assert int(max_mem_usage_from_cgroup) < 2 * 2**30 From eb7ab5128d009fe89ef1994e2f32ea10bea900b1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Jul 2024 21:07:59 +0000 Subject: [PATCH 219/273] Clean-up custom LLVM 15 patches --- contrib/llvm-project | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/llvm-project b/contrib/llvm-project index d2142eed980..2a8967b60cb 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit d2142eed98046a47ff7112e3cc1e197c8a5cd80f +Subproject commit 2a8967b60cbe5bc2df253712bac343cc5263c5fc From fe42d2ffe49addd28716585a4e2f3b8a53a3d6b0 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 4 Jul 2024 08:10:40 +0100 Subject: [PATCH 220/273] Lower sampling rate --- src/Core/ServerSettings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index e70be61118a..6c62ab6def8 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -153,7 +153,7 @@ namespace DB M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ - M(Double, gwp_asan_force_sample_probability, 0.001, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ + M(Double, gwp_asan_force_sample_probability, 0.0005, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp From e38e105e20ae6406a60baa0a08beed518676b346 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 18 Apr 2024 16:44:20 +0800 Subject: [PATCH 221/273] add window function percent_rank --- .../sql-reference/window-functions/index.md | 1 + src/Processors/Transforms/WindowTransform.cpp | 171 ++++++++++++++++-- .../01592_window_functions.reference | 12 ++ .../0_stateless/01592_window_functions.sql | 18 ++ 4 files changed, 184 insertions(+), 18 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 49076f3cbe1..8097abc0b15 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -23,6 +23,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | `GROUPS` frame | ❌ | | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | | `rank()`, `dense_rank()`, `row_number()` | ✅ | +| `percent_rank()` | ✅ equal to `ifNull((rank() OVER(PARTITION BY x order by y) - 1) / nullif(count(1) OVER(PARTITION BY x) -1, 0), 0)`, but more efficent| | `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | | ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index b9f61d30182..0c7caca9de5 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -16,6 +16,10 @@ #include #include #include +#include "WindowTransform.h" + +#include +#include #include @@ -1609,8 +1613,37 @@ struct WindowFunctionHelpers { recurrent_detail::setValueToOutputColumn(transform, function_index, value); } + + ALWAYS_INLINE static bool checkPartitionEnterFirstRow(const WindowTransform * transform) { return transform->current_row_number == 1; } + + ALWAYS_INLINE static bool checkPartitionEnterLastRow(const WindowTransform * transform) + { + /// when partition_ended is false, it means that we don't reach the last row in this partition. + /// But when partition_ended is true, it doesn't mean that we reach the last row in this partition. + /// partition_ended is true when + /// - the input has finished. or + /// - current block contains next partition's data. + /// This is for fast check. + if (!transform->partition_ended) + return false; + + auto current_row = transform->current_row; + current_row.row++; + const auto & partitoin_end_row = transform->partition_end; + /// If current_row == partitoin_end_row, return true. otherwise + if (current_row != partitoin_end_row) + { + if (current_row.row < transform->blockRowsNumber(current_row)) + return false; + /// Next row to current_row may belong to next block. + if (partitoin_end_row.block != current_row.block + 1 || partitoin_end_row.row) + return false; + } + return true; + } }; + template struct StatefulWindowFunction : public WindowFunction { @@ -1639,6 +1672,8 @@ struct StatefulWindowFunction : public WindowFunction { return *reinterpret_cast(workspace.aggregate_function_state.data()); } + + }; struct ExponentialTimeDecayedSumState @@ -2128,7 +2163,7 @@ namespace } } // new partition - if (transform->current_row_number == 1) [[unlikely]] + if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform)) [[unlikely]] { current_partition_rows = 0; current_partition_inserted_row = 0; @@ -2137,25 +2172,9 @@ namespace current_partition_rows++; // Only do the action when we meet the last row in this partition. - if (!transform->partition_ended) + if (!WindowFunctionHelpers::checkPartitionEnterLastRow(transform)) return; - else - { - auto current_row = transform->current_row; - current_row.row++; - const auto & end_row = transform->partition_end; - if (current_row != end_row) - { - if (current_row.row < transform->blockRowsNumber(current_row)) - return; - if (end_row.block != current_row.block + 1 || end_row.row) - { - return; - } - // else, current_row is the last input row. - } - } auto bucket_capacity = current_partition_rows / buckets; auto capacity_diff = current_partition_rows - bucket_capacity * buckets; @@ -2211,6 +2230,115 @@ namespace } } +namespace +{ +struct PercentRankState +{ + RowNumber start_row; + UInt64 current_partition_rows = 0; +}; +} + +struct WindowFunctionPercentRank final : public StatefulWindowFunction +{ +public: + WindowFunctionPercentRank(const std::string & name_, + const DataTypes & argument_types_, const Array & parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) + {} + + bool allocatesMemoryInArena() const override { return false; } + + std::optional getDefaultFrame() const override + { + WindowFrame frame; + frame.type = WindowFrame::FrameType::ROWS; + frame.end_type = WindowFrame::BoundaryType::Unbounded; + return frame; + } + + void windowInsertResultInto(const WindowTransform * transform, size_t function_index) const override + { + checkFrameBoundType(transform); + + auto & state = getWorkspaceState(transform, function_index); + if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform)) + { + state.current_partition_rows = 0; + state.start_row = transform->current_row; + } + + insertRankIntoColumn(transform, function_index); + state.current_partition_rows++; + + if (!WindowFunctionHelpers::checkPartitionEnterLastRow(transform)) + { + return; + } + + UInt64 remaining_rows = state.current_partition_rows; + Float64 percent_rank_denominator = state.current_partition_rows - 1; + + if (remaining_rows <= 1) + return; + while(remaining_rows > 0) + { + auto block_rows_number = transform->blockRowsNumber(state.start_row); + auto available_block_rows = block_rows_number - state.start_row.row; + if (available_block_rows <= remaining_rows) + { + auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index]; + auto & data = assert_cast(to_column).getData(); + for (size_t i = state.start_row.row; i < block_rows_number; ++i) + data[i] = data[i] / percent_rank_denominator; + + state.start_row.block++; + state.start_row.row = 0; + remaining_rows -= available_block_rows; + } + else + { + auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index]; + auto & data = assert_cast(to_column).getData(); + for (size_t i = state.start_row.row, n = state.start_row.row + remaining_rows; i < n; ++i) + { + data[i] = data[i]/percent_rank_denominator; + } + state.start_row.row += remaining_rows; + remaining_rows = 0; + } + } + } + + + inline PercentRankState & getWorkspaceState(const WindowTransform * transform, size_t function_index) const + { + const auto & workspace = transform->workspaces[function_index]; + return getState(workspace); + } + + inline void insertRankIntoColumn(const WindowTransform * transform, size_t function_index) const + { + auto & to_column = *transform->blockAt(transform->current_row).output_columns[function_index]; + assert_cast(to_column).getData().push_back(static_cast(transform->peer_group_start_row_number) - 1); + } +private: + mutable bool has_check_frame_bound_type = false; + ALWAYS_INLINE void checkFrameBoundType(const WindowTransform * transform) const + { + if (has_check_frame_bound_type) + return; + if (transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded + || transform->window_description.frame.end_type != WindowFrame::BoundaryType::Unbounded) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Window frame for function 'percent_rank' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); + } + has_check_frame_bound_type = true; + } +}; + // ClickHouse-specific variant of lag/lead that respects the window frame. template struct WindowFunctionLagLeadInFrame final : public WindowFunction @@ -2582,6 +2710,13 @@ void registerWindowFunctions(AggregateFunctionFactory & factory) parameters); }, properties}, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("percent_rank", {[](const std::string & name, + const DataTypes & argument_types, const Array & parameters, const Settings *) + { + return std::make_shared(name, argument_types, + parameters); + }, properties}, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("row_number", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { diff --git a/tests/queries/0_stateless/01592_window_functions.reference b/tests/queries/0_stateless/01592_window_functions.reference index ec957dd7a02..0995def71e2 100644 --- a/tests/queries/0_stateless/01592_window_functions.reference +++ b/tests/queries/0_stateless/01592_window_functions.reference @@ -79,3 +79,15 @@ iPhone 900 Smartphone 500 500 Kindle Fire 150 Tablet 150 350 Samsung Galaxy Tab 200 Tablet 175 350 iPad 700 Tablet 350 350 +---- Q8 ---- +Lenovo Thinkpad Laptop 700 0 +Sony VAIO Laptop 700 0.3333333333333333 +Dell Vostro Laptop 800 0.6666666666666666 +HP Elite Laptop 1200 1 +Microsoft Lumia Smartphone 200 0 +HTC One Smartphone 400 0.3333333333333333 +Nexus Smartphone 500 0.6666666666666666 +iPhone Smartphone 900 1 +Kindle Fire Tablet 150 0 +Samsung Galaxy Tab Tablet 200 0.5 +iPad Tablet 700 1 diff --git a/tests/queries/0_stateless/01592_window_functions.sql b/tests/queries/0_stateless/01592_window_functions.sql index f0d173b1f20..e48e26b26d2 100644 --- a/tests/queries/0_stateless/01592_window_functions.sql +++ b/tests/queries/0_stateless/01592_window_functions.sql @@ -101,5 +101,23 @@ SELECT FROM products INNER JOIN product_groups USING (group_id)) t order by group_name, product_name, price; +select '---- Q8 ----'; +SELECT * +FROM +( + SELECT + product_name, + group_name, + price, + percent_rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS r + FROM products + INNER JOIN product_groups USING (group_id) +) AS t +ORDER BY + group_name ASC, + r ASC, + product_name ASC, + price ASC; + drop table product_groups; drop table products; From 6e231eedcf04e4136ea56fcbbb4a43916241dd23 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 18 Apr 2024 17:16:20 +0800 Subject: [PATCH 222/273] fixed style --- src/Processors/Transforms/WindowTransform.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 0c7caca9de5..4758d5ca7f4 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -16,10 +16,6 @@ #include #include #include -#include "WindowTransform.h" - -#include -#include #include @@ -1643,7 +1639,6 @@ struct WindowFunctionHelpers } }; - template struct StatefulWindowFunction : public WindowFunction { @@ -1672,8 +1667,6 @@ struct StatefulWindowFunction : public WindowFunction { return *reinterpret_cast(workspace.aggregate_function_state.data()); } - - }; struct ExponentialTimeDecayedSumState @@ -2281,7 +2274,7 @@ public: if (remaining_rows <= 1) return; - while(remaining_rows > 0) + while (remaining_rows > 0) { auto block_rows_number = transform->blockRowsNumber(state.start_row); auto available_block_rows = block_rows_number - state.start_row.row; From 37d2ced74cd173c44015a89fcb9522ef9c3979ee Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 18 Apr 2024 17:35:43 +0800 Subject: [PATCH 223/273] fixed typos --- docs/en/sql-reference/window-functions/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 8097abc0b15..814a7ac4aca 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -23,7 +23,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | `GROUPS` frame | ❌ | | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | | `rank()`, `dense_rank()`, `row_number()` | ✅ | -| `percent_rank()` | ✅ equal to `ifNull((rank() OVER(PARTITION BY x order by y) - 1) / nullif(count(1) OVER(PARTITION BY x) -1, 0), 0)`, but more efficent| +| `percent_rank()` | ✅ equal to `ifNull((rank() OVER(PARTITION BY x order by y) - 1) / nullif(count(1) OVER(PARTITION BY x) -1, 0), 0)`, but more efficient| | `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | | ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | From 13d5b336adb8021742f842bff457ee8b3267b743 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 19 Apr 2024 09:16:31 +0800 Subject: [PATCH 224/273] check window frame --- src/Processors/Transforms/WindowTransform.cpp | 91 +++++++++++-------- 1 file changed, 55 insertions(+), 36 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 4758d5ca7f4..45c9f4457b8 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -17,6 +17,9 @@ #include #include +#include +#include + #include @@ -71,6 +74,9 @@ public: size_t function_index) const = 0; virtual std::optional getDefaultFrame() const { return {}; } + + /// Is the frame type supported by this function. + virtual bool checkWindowFrameType(const WindowTransform * /*transform*/) const { return true; } }; // Compares ORDER BY column values at given rows to find the boundaries of frame: @@ -402,6 +408,19 @@ WindowTransform::WindowTransform(const Block & input_header_, } } } + + for (const auto & workspace : workspaces) + { + if (workspace.window_function_impl) + { + if (!workspace.window_function_impl->checkWindowFrameType(this)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported window frame type for function '{}'", + workspace.aggregate_function->getName()); + } + } + + } } WindowTransform::~WindowTransform() @@ -2086,8 +2105,6 @@ namespace const WindowTransform * transform, size_t function_index, const DataTypes & argument_types); - - static void checkWindowFrameType(const WindowTransform * transform); }; } @@ -2107,6 +2124,29 @@ struct WindowFunctionNtile final : public StatefulWindowFunction } bool allocatesMemoryInArena() const override { return false; } + + bool checkWindowFrameType(const WindowTransform * transform) const override + { + if (transform->order_by_indices.empty()) + { + LOG_ERROR(getLogger("WindowFunctionNtile"), "Window frame for 'ntile' function must have ORDER BY clause"); + return false; + } + + // We must wait all for the partition end and get the total rows number in this + // partition. So before the end of this partition, there is no any block could be + // dropped out. + bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded + && transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded; + if (!is_frame_supported) + { + LOG_ERROR( + getLogger("WindowFunctionNtile"), + "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); + return false; + } + return true; + } std::optional getDefaultFrame() const override { @@ -2134,7 +2174,6 @@ namespace { if (!buckets) [[unlikely]] { - checkWindowFrameType(transform); const auto & current_block = transform->blockAt(transform->current_row); const auto & workspace = transform->workspaces[function_index]; const auto & arg_col = *current_block.original_input_columns[workspace.argument_column_indices[0]]; @@ -2205,22 +2244,6 @@ namespace bucket_num += 1; } } - - void NtileState::checkWindowFrameType(const WindowTransform * transform) - { - if (transform->order_by_indices.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for 'ntile' function must have ORDER BY clause"); - - // We must wait all for the partition end and get the total rows number in this - // partition. So before the end of this partition, there is no any block could be - // dropped out. - bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded - && transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded; - if (!is_frame_supported) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); - } - } } namespace @@ -2249,11 +2272,22 @@ public: frame.end_type = WindowFrame::BoundaryType::Unbounded; return frame; } + + bool checkWindowFrameType(const WindowTransform * transform) const override + { + if (transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded + || transform->window_description.frame.end_type != WindowFrame::BoundaryType::Unbounded) + { + LOG_ERROR(getLogger("WindowFunctionPercentRank"), + "Window frame for function 'percent_rank' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); + return false; + } + return true; + } + void windowInsertResultInto(const WindowTransform * transform, size_t function_index) const override { - checkFrameBoundType(transform); - auto & state = getWorkspaceState(transform, function_index); if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform)) { @@ -2315,21 +2349,6 @@ public: auto & to_column = *transform->blockAt(transform->current_row).output_columns[function_index]; assert_cast(to_column).getData().push_back(static_cast(transform->peer_group_start_row_number) - 1); } -private: - mutable bool has_check_frame_bound_type = false; - ALWAYS_INLINE void checkFrameBoundType(const WindowTransform * transform) const - { - if (has_check_frame_bound_type) - return; - if (transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded - || transform->window_description.frame.end_type != WindowFrame::BoundaryType::Unbounded) - { - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Window frame for function 'percent_rank' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); - } - has_check_frame_bound_type = true; - } }; // ClickHouse-specific variant of lag/lead that respects the window frame. From 04e7b11a6477ed8b554a12ca301d00ba01e0525d Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 19 Apr 2024 10:33:44 +0800 Subject: [PATCH 225/273] fixed style --- src/Processors/Transforms/WindowTransform.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 45c9f4457b8..1cb447bb6d3 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -2124,7 +2124,7 @@ struct WindowFunctionNtile final : public StatefulWindowFunction } bool allocatesMemoryInArena() const override { return false; } - + bool checkWindowFrameType(const WindowTransform * transform) const override { if (transform->order_by_indices.empty()) @@ -2272,7 +2272,7 @@ public: frame.end_type = WindowFrame::BoundaryType::Unbounded; return frame; } - + bool checkWindowFrameType(const WindowTransform * transform) const override { if (transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded From 7f706dd9d1e7bf0b982c9db86f73c1cc89a4a0a5 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 19 Apr 2024 14:40:58 +0800 Subject: [PATCH 226/273] fixed --- src/Processors/Transforms/WindowTransform.cpp | 11 +--------- .../01592_window_functions.reference | 22 +++++++++---------- .../0_stateless/01592_window_functions.sql | 8 +++---- 3 files changed, 16 insertions(+), 25 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 1cb447bb6d3..ce188ed47ae 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -2265,18 +2265,9 @@ public: bool allocatesMemoryInArena() const override { return false; } - std::optional getDefaultFrame() const override - { - WindowFrame frame; - frame.type = WindowFrame::FrameType::ROWS; - frame.end_type = WindowFrame::BoundaryType::Unbounded; - return frame; - } - bool checkWindowFrameType(const WindowTransform * transform) const override { - if (transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded - || transform->window_description.frame.end_type != WindowFrame::BoundaryType::Unbounded) + if (transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded) { LOG_ERROR(getLogger("WindowFunctionPercentRank"), "Window frame for function 'percent_rank' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); diff --git a/tests/queries/0_stateless/01592_window_functions.reference b/tests/queries/0_stateless/01592_window_functions.reference index 0995def71e2..f88360abcc1 100644 --- a/tests/queries/0_stateless/01592_window_functions.reference +++ b/tests/queries/0_stateless/01592_window_functions.reference @@ -80,14 +80,14 @@ Kindle Fire 150 Tablet 150 350 Samsung Galaxy Tab 200 Tablet 175 350 iPad 700 Tablet 350 350 ---- Q8 ---- -Lenovo Thinkpad Laptop 700 0 -Sony VAIO Laptop 700 0.3333333333333333 -Dell Vostro Laptop 800 0.6666666666666666 -HP Elite Laptop 1200 1 -Microsoft Lumia Smartphone 200 0 -HTC One Smartphone 400 0.3333333333333333 -Nexus Smartphone 500 0.6666666666666666 -iPhone Smartphone 900 1 -Kindle Fire Tablet 150 0 -Samsung Galaxy Tab Tablet 200 0.5 -iPad Tablet 700 1 +Lenovo Thinkpad Laptop 700 1 0 +Sony VAIO Laptop 700 1 0 +Dell Vostro Laptop 800 3 0.6666666666666666 +HP Elite Laptop 1200 4 1 +Microsoft Lumia Smartphone 200 1 0 +HTC One Smartphone 400 2 0.3333333333333333 +Nexus Smartphone 500 3 0.6666666666666666 +iPhone Smartphone 900 4 1 +Kindle Fire Tablet 150 1 0 +Samsung Galaxy Tab Tablet 200 2 0.5 +iPad Tablet 700 3 1 diff --git a/tests/queries/0_stateless/01592_window_functions.sql b/tests/queries/0_stateless/01592_window_functions.sql index e48e26b26d2..f4b868c36e4 100644 --- a/tests/queries/0_stateless/01592_window_functions.sql +++ b/tests/queries/0_stateless/01592_window_functions.sql @@ -109,15 +109,15 @@ FROM product_name, group_name, price, - percent_rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS r + rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS rank, + percent_rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS percent FROM products INNER JOIN product_groups USING (group_id) ) AS t ORDER BY group_name ASC, - r ASC, - product_name ASC, - price ASC; + price ASC, + product_name ASC; drop table product_groups; drop table products; From 656a9a7260e3789b8fb671b788dbb3126d88ebe9 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 19 Apr 2024 14:42:24 +0800 Subject: [PATCH 227/273] update --- tests/queries/0_stateless/01592_window_functions.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01592_window_functions.sql b/tests/queries/0_stateless/01592_window_functions.sql index f4b868c36e4..2fc0e55bf02 100644 --- a/tests/queries/0_stateless/01592_window_functions.sql +++ b/tests/queries/0_stateless/01592_window_functions.sql @@ -109,14 +109,14 @@ FROM product_name, group_name, price, - rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS rank, + rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS rank, percent_rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS percent FROM products INNER JOIN product_groups USING (group_id) ) AS t ORDER BY group_name ASC, - price ASC, + price ASC, product_name ASC; drop table product_groups; From 91d2e5c72b38bb607d5a75020d38230a6937f310 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 19 Apr 2024 14:48:42 +0800 Subject: [PATCH 228/273] more corver case --- tests/queries/0_stateless/01592_window_functions.reference | 1 + tests/queries/0_stateless/01592_window_functions.sql | 3 +++ 2 files changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/01592_window_functions.reference b/tests/queries/0_stateless/01592_window_functions.reference index f88360abcc1..06ec67ee82d 100644 --- a/tests/queries/0_stateless/01592_window_functions.reference +++ b/tests/queries/0_stateless/01592_window_functions.reference @@ -91,3 +91,4 @@ iPhone Smartphone 900 4 1 Kindle Fire Tablet 150 1 0 Samsung Galaxy Tab Tablet 200 2 0.5 iPad Tablet 700 3 1 +Others Unknow 200 1 0 diff --git a/tests/queries/0_stateless/01592_window_functions.sql b/tests/queries/0_stateless/01592_window_functions.sql index 2fc0e55bf02..a660fcca7b2 100644 --- a/tests/queries/0_stateless/01592_window_functions.sql +++ b/tests/queries/0_stateless/01592_window_functions.sql @@ -102,6 +102,9 @@ FROM products INNER JOIN product_groups USING (group_id)) t order by group_name, product_name, price; select '---- Q8 ----'; +INSERT INTO product_groups VALUES (4, 'Unknow'); +INSERT INTO products (product_id,product_name, group_id,price) VALUES (12, 'Others', 4, 200); + SELECT * FROM ( From e52828abf91c2e407fbdf5371e5c794a31b86b1e Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 13 May 2024 11:19:33 +0800 Subject: [PATCH 229/273] fixed typos --- src/Processors/Transforms/WindowTransform.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index ce188ed47ae..ad592613da2 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -1644,14 +1644,14 @@ struct WindowFunctionHelpers auto current_row = transform->current_row; current_row.row++; - const auto & partitoin_end_row = transform->partition_end; + const auto & partition_end_row = transform->partition_end; /// If current_row == partitoin_end_row, return true. otherwise - if (current_row != partitoin_end_row) + if (current_row != partition_end_row) { if (current_row.row < transform->blockRowsNumber(current_row)) return false; /// Next row to current_row may belong to next block. - if (partitoin_end_row.block != current_row.block + 1 || partitoin_end_row.row) + if (partition_end_row.block != current_row.block + 1 || partition_end_row.row) return false; } return true; From fa234cadcbd287145ce93211dd239fdf034d9335 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 14 May 2024 11:47:50 +0800 Subject: [PATCH 230/273] update doc --- docs/en/sql-reference/window-functions/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 814a7ac4aca..16225d4b0e2 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -23,7 +23,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | `GROUPS` frame | ❌ | | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | | `rank()`, `dense_rank()`, `row_number()` | ✅ | -| `percent_rank()` | ✅ equal to `ifNull((rank() OVER(PARTITION BY x order by y) - 1) / nullif(count(1) OVER(PARTITION BY x) -1, 0), 0)`, but more efficient| +| `percent_rank()` | ✅ Efficiently computes the relative standing of a value within a partition in a dataset. This function effectively replaces the more verbose and computationally intensive manual SQL calculation expressed as `ifNull((rank() OVER(PARTITION BY x ORDER BY y) - 1) / nullif(count(1) OVER(PARTITION BY x) - 1, 0), 0)`| | `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | | ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | From 297b65dbbe1859bac7c237d644452b03e3e5849d Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 15 May 2024 17:16:44 +0800 Subject: [PATCH 231/273] fixed --- src/Processors/Transforms/WindowTransform.cpp | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index ad592613da2..517e202556b 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -2267,15 +2267,25 @@ public: bool checkWindowFrameType(const WindowTransform * transform) const override { - if (transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded) - { - LOG_ERROR(getLogger("WindowFunctionPercentRank"), - "Window frame for function 'percent_rank' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); - return false; + if (transform->window_description.frame.type != WindowFrame::FrameType::RANGE + || transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded) + { + LOG_ERROR( + getLogger("WindowFunctionPercentRank"), + "Window frame for function 'percent_rank' should be 'RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT'"); + return false; } return true; } + std::optional getDefaultFrame() const override + { + WindowFrame frame; + frame.type = WindowFrame::FrameType::RANGE; + frame.begin_type = WindowFrame::BoundaryType::Unbounded; + frame.end_type = WindowFrame::BoundaryType::Current; + return frame; + } void windowInsertResultInto(const WindowTransform * transform, size_t function_index) const override { From b6782d4b2d98e72002b691a3a421d689831fc1bf Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 24 May 2024 09:39:20 +0800 Subject: [PATCH 232/273] update --- src/Processors/Transforms/WindowTransform.cpp | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 517e202556b..729fef5c05d 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -1633,24 +1633,24 @@ struct WindowFunctionHelpers ALWAYS_INLINE static bool checkPartitionEnterLastRow(const WindowTransform * transform) { - /// when partition_ended is false, it means that we don't reach the last row in this partition. - /// But when partition_ended is true, it doesn't mean that we reach the last row in this partition. - /// partition_ended is true when - /// - the input has finished. or - /// - current block contains next partition's data. /// This is for fast check. if (!transform->partition_ended) return false; auto current_row = transform->current_row; + /// checkPartitionEnterLastRow is called on each row, also move on current_row.row here. current_row.row++; const auto & partition_end_row = transform->partition_end; - /// If current_row == partitoin_end_row, return true. otherwise + + /// The partition end is reached, when following is true + /// - current row is the partition end row, + /// - or current row is the last row of all input. if (current_row != partition_end_row) { + /// when current row is not the partition end row, we need to check whether it's the last + /// input row. if (current_row.row < transform->blockRowsNumber(current_row)) return false; - /// Next row to current_row may belong to next block. if (partition_end_row.block != current_row.block + 1 || partition_end_row.row) return false; } @@ -2268,7 +2268,8 @@ public: bool checkWindowFrameType(const WindowTransform * transform) const override { if (transform->window_description.frame.type != WindowFrame::FrameType::RANGE - || transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded) + || transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded + || transform->window_description.frame.end_type != WindowFrame::BoundaryType::Current) { LOG_ERROR( getLogger("WindowFunctionPercentRank"), @@ -2305,20 +2306,20 @@ public: } UInt64 remaining_rows = state.current_partition_rows; - Float64 percent_rank_denominator = state.current_partition_rows - 1; + Float64 percent_rank_denominator = remaining_rows == 1 ? 1 : remaining_rows - 1; - if (remaining_rows <= 1) - return; while (remaining_rows > 0) { auto block_rows_number = transform->blockRowsNumber(state.start_row); auto available_block_rows = block_rows_number - state.start_row.row; if (available_block_rows <= remaining_rows) { + /// This partition involves multiple blocks. Finish current block and move on to the + /// next block. auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index]; auto & data = assert_cast(to_column).getData(); for (size_t i = state.start_row.row; i < block_rows_number; ++i) - data[i] = data[i] / percent_rank_denominator; + data[i] = (data[i] - 1) / percent_rank_denominator; state.start_row.block++; state.start_row.row = 0; @@ -2326,11 +2327,12 @@ public: } else { + /// The partition ends in current block.s auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index]; auto & data = assert_cast(to_column).getData(); for (size_t i = state.start_row.row, n = state.start_row.row + remaining_rows; i < n; ++i) { - data[i] = data[i]/percent_rank_denominator; + data[i] = (data[i] - 1) / percent_rank_denominator; } state.start_row.row += remaining_rows; remaining_rows = 0; From 87978327d6bab9509c0aef945dfbf76b4437e300 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 27 May 2024 09:00:17 +0800 Subject: [PATCH 233/273] fixed --- src/Processors/Transforms/WindowTransform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 729fef5c05d..a694fa43e46 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -2350,7 +2350,7 @@ public: inline void insertRankIntoColumn(const WindowTransform * transform, size_t function_index) const { auto & to_column = *transform->blockAt(transform->current_row).output_columns[function_index]; - assert_cast(to_column).getData().push_back(static_cast(transform->peer_group_start_row_number) - 1); + assert_cast(to_column).getData().push_back(static_cast(transform->peer_group_start_row_number)); } }; From bfc755c000cb016f88b51a9526fbb32b375ccbe4 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 4 Jul 2024 11:44:24 +0200 Subject: [PATCH 234/273] Fix shutdown --- .../ObjectStorageQueueSource.cpp | 55 +++++++++---------- .../ObjectStorageQueueSource.h | 1 - 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index dc5fb6d2744..f43796fc8b3 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -424,14 +424,14 @@ Chunk ObjectStorageQueueSource::generateImpl() { while (true) { - if (shutdown_called) - { - LOG_TRACE(log, "Shutdown was called, stopping sync"); - break; - } - if (!reader) { + if (shutdown_called) + { + LOG_TEST(log, "Shutdown called"); + break; + } + const auto context = getContext(); reader = StorageObjectStorageSource::createReader( processor_id, file_iterator, configuration, object_storage, read_from_format_info, @@ -448,28 +448,6 @@ Chunk ObjectStorageQueueSource::generateImpl() const auto * object_info = dynamic_cast(reader.getObjectInfo().get()); auto file_metadata = object_info->file_metadata; auto file_status = file_metadata->getFileStatus(); - - if (isCancelled()) - { - reader->cancel(); - - if (processed_rows_from_file) - { - try - { - file_metadata->setFailed("Cancelled", /* reduce_retry_count */true, /* overwrite_status */false); - } - catch (...) - { - LOG_ERROR(log, "Failed to set file {} as failed: {}", - object_info->relative_path, getCurrentExceptionMessage(true)); - } - } - - LOG_TEST(log, "Query is cancelled"); - break; - } - const auto & path = reader.getObjectInfo()->getPath(); if (shutdown_called) @@ -504,6 +482,27 @@ Chunk ObjectStorageQueueSource::generateImpl() path, processed_rows_from_file); } + if (isCancelled()) + { + reader->cancel(); + + if (processed_rows_from_file) + { + try + { + file_metadata->setFailed("Cancelled", /* reduce_retry_count */true, /* overwrite_status */false); + } + catch (...) + { + LOG_ERROR(log, "Failed to set file {} as failed: {}", + object_info->relative_path, getCurrentExceptionMessage(true)); + } + } + + LOG_TEST(log, "Query is cancelled"); + break; + } + auto * prev_scope = CurrentThread::get().attachProfileCountersScope(&file_status->profile_counters); SCOPE_EXIT({ CurrentThread::get().attachProfileCountersScope(prev_scope); }); /// FIXME: if files are compressed, profile counters update does not work fully (object storage related counters are not saved). Why? diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h index fce2a426ecb..0f3d0ab2e92 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h @@ -21,7 +21,6 @@ class ObjectStorageQueueSource : public ISource, WithContext public: using Storage = StorageObjectStorage; using Source = StorageObjectStorageSource; - using RemoveFileFunc = std::function; using BucketHolderPtr = ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr; using BucketHolder = ObjectStorageQueueOrderedFileMetadata::BucketHolder; From 7ae85fda3f0279ceb16a45ee8babdbb0cd40f57d Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 4 Jul 2024 11:46:17 +0200 Subject: [PATCH 235/273] Restore previous order --- .../ObjectStorageQueueSource.cpp | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 4f6f6a0e97a..4d921003e04 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -452,6 +452,27 @@ Chunk ObjectStorageQueueSource::generateImpl() auto file_status = file_metadata->getFileStatus(); const auto & path = reader.getObjectInfo()->getPath(); + if (isCancelled()) + { + reader->cancel(); + + if (processed_rows_from_file) + { + try + { + file_metadata->setFailed("Cancelled", /* reduce_retry_count */true, /* overwrite_status */false); + } + catch (...) + { + LOG_ERROR(log, "Failed to set file {} as failed: {}", + object_info->relative_path, getCurrentExceptionMessage(true)); + } + } + + LOG_TEST(log, "Query is cancelled"); + break; + } + if (shutdown_called) { LOG_TEST(log, "Shutdown called"); @@ -484,27 +505,6 @@ Chunk ObjectStorageQueueSource::generateImpl() path, processed_rows_from_file); } - if (isCancelled()) - { - reader->cancel(); - - if (processed_rows_from_file) - { - try - { - file_metadata->setFailed("Cancelled", /* reduce_retry_count */true, /* overwrite_status */false); - } - catch (...) - { - LOG_ERROR(log, "Failed to set file {} as failed: {}", - object_info->relative_path, getCurrentExceptionMessage(true)); - } - } - - LOG_TEST(log, "Query is cancelled"); - break; - } - try { auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::ObjectStorageQueuePullMicroseconds); From 33b7afc1b45e0a493f4139dae14025e4e3613c29 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 4 Jul 2024 09:02:33 +0000 Subject: [PATCH 236/273] Bump vectorscan to 5.4.11 --- contrib/icu-cmake/CMakeLists.txt | 2 +- contrib/vectorscan | 2 +- contrib/vectorscan-cmake/CMakeLists.txt | 27 +++++--------------- contrib/vectorscan-cmake/common/hs_version.h | 6 ++++- 4 files changed, 14 insertions(+), 23 deletions(-) diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt index a54bd8c1de2..0a650f2bcc0 100644 --- a/contrib/icu-cmake/CMakeLists.txt +++ b/contrib/icu-cmake/CMakeLists.txt @@ -5,7 +5,7 @@ else () endif () if (NOT ENABLE_ICU) - message(STATUS "Not using icu") + message(STATUS "Not using ICU") return() endif() diff --git a/contrib/vectorscan b/contrib/vectorscan index 4918f81ea3d..d29730e1cb9 160000 --- a/contrib/vectorscan +++ b/contrib/vectorscan @@ -1 +1 @@ -Subproject commit 4918f81ea3d1abd18905bac9876d4a1fe2ebdf07 +Subproject commit d29730e1cb9daaa66bda63426cdce83505d2c809 diff --git a/contrib/vectorscan-cmake/CMakeLists.txt b/contrib/vectorscan-cmake/CMakeLists.txt index d6c626c1612..35d5fd3dc82 100644 --- a/contrib/vectorscan-cmake/CMakeLists.txt +++ b/contrib/vectorscan-cmake/CMakeLists.txt @@ -1,11 +1,8 @@ -# We use vectorscan, a portable and API/ABI-compatible drop-in replacement for hyperscan. - +# Vectorscan is drop-in replacement for Hyperscan. if ((ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER) OR ARCH_AARCH64) - option (ENABLE_VECTORSCAN "Enable vectorscan library" ${ENABLE_LIBRARIES}) + option (ENABLE_VECTORSCAN "Enable vectorscan" ${ENABLE_LIBRARIES}) endif() -# TODO PPC should generally work but needs manual generation of ppc/config.h file on a PPC machine - if (NOT ENABLE_VECTORSCAN) message (STATUS "Not using vectorscan") return() @@ -272,34 +269,24 @@ if (ARCH_AARCH64) ) endif() -# TODO -# if (ARCH_PPC64LE) -# list(APPEND SRCS -# "${LIBRARY_DIR}/src/util/supervector/arch/ppc64el/impl.cpp" -# ) -# endif() - add_library (_vectorscan ${SRCS}) -target_compile_options (_vectorscan PRIVATE - -fno-sanitize=undefined # assume the library takes care of itself - -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # options from original build system -) # library has too much debug information if (OMIT_HEAVY_DEBUG_SYMBOLS) target_compile_options (_vectorscan PRIVATE -g0) endif() -# Include version header manually generated by running the original build system -target_include_directories (_vectorscan SYSTEM PRIVATE common) +target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src") + +# Makes the version header visible. It was generated by running the native build system manually. +# Please update whenever you update vectorscan. +target_include_directories (_vectorscan SYSTEM PUBLIC common) # vectorscan inherited some patched in-source versions of boost headers to fix a bug in # boost 1.69. This bug has been solved long ago but vectorscan's source code still # points to the patched versions, so include it here. target_include_directories (_vectorscan SYSTEM PRIVATE "${LIBRARY_DIR}/include") -target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src") - # Include platform-specific config header generated by manually running the original build system # Please regenerate these files if you update vectorscan. diff --git a/contrib/vectorscan-cmake/common/hs_version.h b/contrib/vectorscan-cmake/common/hs_version.h index 8315b44fb2a..3d266484095 100644 --- a/contrib/vectorscan-cmake/common/hs_version.h +++ b/contrib/vectorscan-cmake/common/hs_version.h @@ -32,8 +32,12 @@ /** * A version string to identify this release of Hyperscan. */ -#define HS_VERSION_STRING "5.4.7 2022-06-20" +#define HS_VERSION_STRING "5.4.11 2024-07-04" #define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (7 << 8) | 0) +#define HS_MAJOR 5 +#define HS_MINOR 4 +#define HS_PATCH 11 + #endif /* HS_VERSION_H_C6428FAF8E3713 */ From 4543ae3d6490a660c4df8b15bde0345735a540e0 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 4 Jul 2024 12:51:22 +0200 Subject: [PATCH 237/273] Update test --- tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql index 95823283812..a01a595dbb5 100644 --- a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql @@ -3,5 +3,5 @@ create table test (d Dynamic) engine=Memory; insert into table test select * from numbers(5); alter table test modify column d Dynamic(max_types=1); select d.UInt64 from test settings allow_experimental_analyzer=1; -select d.UInt64 from test settings allow_experimental_analyzer=1; +select d.UInt64 from test settings allow_experimental_analyzer=0; From 6ea4c101214edf678743833856e09f717f672c67 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 27 Jun 2024 17:19:52 +0200 Subject: [PATCH 238/273] Done --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f6d282792db..9da0b297e9e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -399,7 +399,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", 0) \ + M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", IMPORTANT) \ M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ From 6f89c4b9328b587d2342ca172da4eaebe0611a1c Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 28 Jun 2024 21:09:36 +0000 Subject: [PATCH 239/273] Bump the minimal version to keep compatibility --- tests/integration/helpers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 544b06cca1b..34f5c28fef8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -73,7 +73,7 @@ CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.lo # Minimum version we use in integration tests to check compatibility with old releases # Keep in mind that we only support upgrading between releases that are at most 1 year different. # This means that this minimum need to be, at least, 1 year older than the current release -CLICKHOUSE_CI_MIN_TESTED_VERSION = "22.8" +CLICKHOUSE_CI_MIN_TESTED_VERSION = "23.3" # to create docker-compose env file From d62454714b1e8ea760c6d6baa8df4ec185b8750c Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 1 Jul 2024 17:52:35 +0200 Subject: [PATCH 240/273] Make test error visible --- tests/ci/integration_tests_runner.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 87f721cfde7..7802dfa3c52 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -434,7 +434,14 @@ class ClickhouseIntegrationTestsRunner: "Getting all tests to the file %s with cmd: \n%s", out_file_full, cmd ) with open(out_file_full, "wb") as ofd: - subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) + try: + subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) + except subprocess.CalledProcessError as ex: + print("ERROR: Setting test plan failed. Output:") + with open(out_file_full, 'r') as file: + for line in file: + print(" " + line, end='') + raise ex all_tests = set() with open(out_file_full, "r", encoding="utf-8") as all_tests_fd: From e2553454ecbe890cecf90995565d805287ca0703 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 1 Jul 2024 16:02:06 +0000 Subject: [PATCH 241/273] Automatic style fix --- tests/ci/integration_tests_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 7802dfa3c52..a1c33cf22d9 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -438,9 +438,9 @@ class ClickhouseIntegrationTestsRunner: subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) except subprocess.CalledProcessError as ex: print("ERROR: Setting test plan failed. Output:") - with open(out_file_full, 'r') as file: + with open(out_file_full, "r") as file: for line in file: - print(" " + line, end='') + print(" " + line, end="") raise ex all_tests = set() From 782115efea35a60cd9627faf22c1684ad54a551d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 1 Jul 2024 18:06:47 +0200 Subject: [PATCH 242/273] Very bad change --- tests/integration/test_distributed_inter_server_secret/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 50d7be4d11e..3e656c9d776 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -28,7 +28,7 @@ def make_instance(name, *args, **kwargs): # DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 added in 23.3, ensure that CLICKHOUSE_CI_MIN_TESTED_VERSION fits -assert CLICKHOUSE_CI_MIN_TESTED_VERSION < "23.3" +assert CLICKHOUSE_CI_MIN_TESTED_VERSION <= "23.3" # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user From 2c37cc048c16a90d972c0f1c2b9c41727174cff3 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 1 Jul 2024 22:11:44 +0200 Subject: [PATCH 243/273] Style --- tests/ci/integration_tests_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index a1c33cf22d9..21f16d995a4 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -438,7 +438,7 @@ class ClickhouseIntegrationTestsRunner: subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) except subprocess.CalledProcessError as ex: print("ERROR: Setting test plan failed. Output:") - with open(out_file_full, "r") as file: + with open(out_file_full, "r", encoding="utf-8") as file: for line in file: print(" " + line, end="") raise ex From 6fcd0eed06af3a02d1e26f182eb7f2bbf16d6471 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Jul 2024 11:40:04 +0000 Subject: [PATCH 244/273] Remove tests which are no longer relevant --- .../test.py | 36 +------------------ 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 3e656c9d776..457590ac851 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -7,7 +7,7 @@ import uuid import time from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION +from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) @@ -26,10 +26,6 @@ def make_instance(name, *args, **kwargs): **kwargs, ) - -# DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 added in 23.3, ensure that CLICKHOUSE_CI_MIN_TESTED_VERSION fits -assert CLICKHOUSE_CI_MIN_TESTED_VERSION <= "23.3" - # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user n1 = make_instance( @@ -38,14 +34,6 @@ n1 = make_instance( user_configs=["configs/users.d/new_user.xml"], ) n2 = make_instance("n2", main_configs=["configs/remote_servers_n2.xml"]) -backward = make_instance( - "backward", - main_configs=["configs/remote_servers_backward.xml"], - image="clickhouse/clickhouse-server", - # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 - tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, - with_installed_binary=True, -) users = pytest.mark.parametrize( "user,password", @@ -427,28 +415,6 @@ def test_per_user_protocol_settings_secure_cluster(user, password): ) -@users -def test_user_secure_cluster_with_backward(user, password): - id_ = "with-backward-query-dist_secure-" + user - n1.query( - f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password - ) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - -@users -def test_user_secure_cluster_from_backward(user, password): - id_ = "from-backward-query-dist_secure-" + user - backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - assert n1.contains_in_log( - "Using deprecated interserver protocol because the client is too old. Consider upgrading all nodes in cluster." - ) - - def test_secure_cluster_distributed_over_distributed_different_users(): # This works because we will have initial_user='default' n1.query( From 910065e42745e3a1299a596462d6197e4c36a50f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 2 Jul 2024 11:48:28 +0000 Subject: [PATCH 245/273] Automatic style fix --- tests/integration/test_distributed_inter_server_secret/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 457590ac851..7ecb2cda257 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -26,6 +26,7 @@ def make_instance(name, *args, **kwargs): **kwargs, ) + # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user n1 = make_instance( From dd3eb538f6a0788365ff62e15ab167b28f3d76a1 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Jul 2024 22:51:21 +0000 Subject: [PATCH 246/273] Better --- src/Client/HedgedConnections.cpp | 6 ++ src/Client/MultiplexedConnections.cpp | 6 ++ src/Server/TCPHandler.cpp | 7 ++ .../test_analyzer_compatibility/__init__.py | 0 .../configs/remote_servers.xml | 17 ++++ .../test_analyzer_compatibility/test.py | 79 +++++++++++++++++++ 6 files changed, 115 insertions(+) create mode 100644 tests/integration/test_analyzer_compatibility/__init__.py create mode 100644 tests/integration/test_analyzer_compatibility/configs/remote_servers.xml create mode 100644 tests/integration/test_analyzer_compatibility/test.py diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 8c993f906e0..51cbe6f3d6f 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -195,6 +195,12 @@ void HedgedConnections::sendQuery( modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset; } + /// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting. + /// Make the analyzer being set, so it will be effectively applied on the remote server. + /// In other words, the initiator always controls whether the analyzer enabled or not for + /// all servers involved in the distributed query processing. + modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); + replica.connection->sendQuery(timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {}); replica.change_replica_timeout.setRelative(timeouts.receive_data_timeout); replica.packet_receiver->setTimeout(hedged_connections_factory.getConnectionTimeouts().receive_timeout); diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 5d0fc8fd39e..99bdd706d8b 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -150,6 +150,12 @@ void MultiplexedConnections::sendQuery( } } + /// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting. + /// Make the analyzer being set, so it will be effectively applied on the remote server. + /// In other words, the initiator always controls whether the analyzer enabled or not for + /// all servers involved in the distributed query processing. + modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); + const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0; size_t num_replicas = replica_states.size(); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a522a3f8782..cfb41be0c27 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1890,6 +1890,13 @@ void TCPHandler::receiveQuery() /// /// Settings /// + + /// FIXME: Remove when allow_experimental_analyzer will become obsolete. + /// Even if allow_experimental_analyzer setting wasn't explicitly changed on the initiator server, it might be disabled there + /// So we just force ourselves to act in the same way. + if (query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) + passed_settings.set("allow_experimental_analyzer", static_cast(passed_settings.allow_experimental_analyzer)); + auto settings_changes = passed_settings.changes(); query_kind = query_context->getClientInfo().query_kind; if (query_kind == ClientInfo::QueryKind::INITIAL_QUERY) diff --git a/tests/integration/test_analyzer_compatibility/__init__.py b/tests/integration/test_analyzer_compatibility/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml b/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml new file mode 100644 index 00000000000..0a50dab7fd3 --- /dev/null +++ b/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml @@ -0,0 +1,17 @@ + + + + + true + + current + 9000 + + + backward + 9000 + + + + + diff --git a/tests/integration/test_analyzer_compatibility/test.py b/tests/integration/test_analyzer_compatibility/test.py new file mode 100644 index 00000000000..0ba7f248606 --- /dev/null +++ b/tests/integration/test_analyzer_compatibility/test.py @@ -0,0 +1,79 @@ +import uuid + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +CLICKHOUSE_MAX_VERSION_WITH_ANALYZER_DISABLED_BY_DEFAULT = "24.2" + +cluster = ClickHouseCluster(__file__) +# Here analyzer is enabled by default +current = cluster.add_instance( + "current", + main_configs=["configs/remote_servers.xml"], +) +# Here analyzer is disabled by default +backward = cluster.add_instance( + "backward", + use_old_analyzer=True, + main_configs=["configs/remote_servers.xml"], + image="clickhouse/clickhouse-server", + tag=CLICKHOUSE_MAX_VERSION_WITH_ANALYZER_DISABLED_BY_DEFAULT, + with_installed_binary=True, +) + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_two_new_versions(start_cluster): + # Two new versions (both know about the analyzer) + # One have it enabled by default, another one - disabled. + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + query_id = str(uuid.uuid4()) + current.query("SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables);", query_id=query_id) + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + assert current.query(""" +SELECT hostname() AS h, getSetting('allow_experimental_analyzer') +FROM clusterAllReplicas('test_cluster_mixed', system.one) +ORDER BY h;""") == TSV([["backward", "true"], ["current", "true"]]) + + # Should be enabled everywhere + analyzer_enabled = current.query(f""" +SELECT +DISTINCT Settings['allow_experimental_analyzer'] +FROM clusterAllReplicas('test_cluster_mixed', system.query_log) +WHERE initial_query_id = '{query_id}';""") + + assert TSV(analyzer_enabled) == TSV("1") + + query_id = str(uuid.uuid4()) + backward.query("SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables)", query_id=query_id) + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + assert backward.query(""" +SELECT hostname() AS h, getSetting('allow_experimental_analyzer') +FROM clusterAllReplicas('test_cluster_mixed', system.one) +ORDER BY h;""") == TSV([["backward", "false"], ["current", "false"]]) + + # Should be disabled everywhere + analyzer_enabled = backward.query(f""" +SELECT +DISTINCT Settings['allow_experimental_analyzer'] +FROM clusterAllReplicas('test_cluster_mixed', system.query_log) +WHERE initial_query_id = '{query_id}';""") + + assert TSV(analyzer_enabled) == TSV("0") From 7bd283764cc7c9350c376c3dc4f4b5c1bba0deee Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 2 Jul 2024 22:59:22 +0000 Subject: [PATCH 247/273] Automatic style fix --- .../test_analyzer_compatibility/test.py | 41 ++++++++++++++----- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/tests/integration/test_analyzer_compatibility/test.py b/tests/integration/test_analyzer_compatibility/test.py index 0ba7f248606..d4ded420c61 100644 --- a/tests/integration/test_analyzer_compatibility/test.py +++ b/tests/integration/test_analyzer_compatibility/test.py @@ -22,6 +22,7 @@ backward = cluster.add_instance( with_installed_binary=True, ) + @pytest.fixture(scope="module") def start_cluster(): try: @@ -39,41 +40,61 @@ def test_two_new_versions(start_cluster): backward.query("SYSTEM FLUSH LOGS") query_id = str(uuid.uuid4()) - current.query("SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables);", query_id=query_id) + current.query( + "SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables);", + query_id=query_id, + ) current.query("SYSTEM FLUSH LOGS") backward.query("SYSTEM FLUSH LOGS") - assert current.query(""" + assert ( + current.query( + """ SELECT hostname() AS h, getSetting('allow_experimental_analyzer') FROM clusterAllReplicas('test_cluster_mixed', system.one) -ORDER BY h;""") == TSV([["backward", "true"], ["current", "true"]]) +ORDER BY h;""" + ) + == TSV([["backward", "true"], ["current", "true"]]) + ) # Should be enabled everywhere - analyzer_enabled = current.query(f""" + analyzer_enabled = current.query( + f""" SELECT DISTINCT Settings['allow_experimental_analyzer'] FROM clusterAllReplicas('test_cluster_mixed', system.query_log) -WHERE initial_query_id = '{query_id}';""") +WHERE initial_query_id = '{query_id}';""" + ) assert TSV(analyzer_enabled) == TSV("1") query_id = str(uuid.uuid4()) - backward.query("SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables)", query_id=query_id) + backward.query( + "SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables)", + query_id=query_id, + ) current.query("SYSTEM FLUSH LOGS") backward.query("SYSTEM FLUSH LOGS") - assert backward.query(""" + assert ( + backward.query( + """ SELECT hostname() AS h, getSetting('allow_experimental_analyzer') FROM clusterAllReplicas('test_cluster_mixed', system.one) -ORDER BY h;""") == TSV([["backward", "false"], ["current", "false"]]) +ORDER BY h;""" + ) + == TSV([["backward", "false"], ["current", "false"]]) + ) # Should be disabled everywhere - analyzer_enabled = backward.query(f""" + analyzer_enabled = backward.query( + f""" SELECT DISTINCT Settings['allow_experimental_analyzer'] FROM clusterAllReplicas('test_cluster_mixed', system.query_log) -WHERE initial_query_id = '{query_id}';""") +WHERE initial_query_id = '{query_id}';""" + ) assert TSV(analyzer_enabled) == TSV("0") From d57375181d7de82f28ce5fcd40580cf04c2411b8 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 3 Jul 2024 15:49:52 +0000 Subject: [PATCH 248/273] Better --- src/Core/Settings.h | 2 +- tests/integration/helpers/cluster.py | 2 +- .../test.py | 35 ++++++++++++++++++- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9da0b297e9e..f6d282792db 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -399,7 +399,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", IMPORTANT) \ + M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", 0) \ M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 34f5c28fef8..544b06cca1b 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -73,7 +73,7 @@ CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.lo # Minimum version we use in integration tests to check compatibility with old releases # Keep in mind that we only support upgrading between releases that are at most 1 year different. # This means that this minimum need to be, at least, 1 year older than the current release -CLICKHOUSE_CI_MIN_TESTED_VERSION = "23.3" +CLICKHOUSE_CI_MIN_TESTED_VERSION = "22.8" # to create docker-compose env file diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 7ecb2cda257..50d7be4d11e 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -7,7 +7,7 @@ import uuid import time from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION cluster = ClickHouseCluster(__file__) @@ -27,6 +27,9 @@ def make_instance(name, *args, **kwargs): ) +# DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 added in 23.3, ensure that CLICKHOUSE_CI_MIN_TESTED_VERSION fits +assert CLICKHOUSE_CI_MIN_TESTED_VERSION < "23.3" + # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user n1 = make_instance( @@ -35,6 +38,14 @@ n1 = make_instance( user_configs=["configs/users.d/new_user.xml"], ) n2 = make_instance("n2", main_configs=["configs/remote_servers_n2.xml"]) +backward = make_instance( + "backward", + main_configs=["configs/remote_servers_backward.xml"], + image="clickhouse/clickhouse-server", + # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 + tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, + with_installed_binary=True, +) users = pytest.mark.parametrize( "user,password", @@ -416,6 +427,28 @@ def test_per_user_protocol_settings_secure_cluster(user, password): ) +@users +def test_user_secure_cluster_with_backward(user, password): + id_ = "with-backward-query-dist_secure-" + user + n1.query( + f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password + ) + assert get_query_user_info(n1, id_) == [user, user] + assert get_query_user_info(backward, id_) == [user, user] + + +@users +def test_user_secure_cluster_from_backward(user, password): + id_ = "from-backward-query-dist_secure-" + user + backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) + assert get_query_user_info(n1, id_) == [user, user] + assert get_query_user_info(backward, id_) == [user, user] + + assert n1.contains_in_log( + "Using deprecated interserver protocol because the client is too old. Consider upgrading all nodes in cluster." + ) + + def test_secure_cluster_distributed_over_distributed_different_users(): # This works because we will have initial_user='default' n1.query( From fcabefa8f3e5a86aad6f5c2b79ef8eabbc349b9d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 3 Jul 2024 16:42:01 +0000 Subject: [PATCH 249/273] Automatically disabling --- src/Server/TCPHandler.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index cfb41be0c27..443cc99475f 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1892,10 +1892,13 @@ void TCPHandler::receiveQuery() /// /// FIXME: Remove when allow_experimental_analyzer will become obsolete. - /// Even if allow_experimental_analyzer setting wasn't explicitly changed on the initiator server, it might be disabled there - /// So we just force ourselves to act in the same way. - if (query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) - passed_settings.set("allow_experimental_analyzer", static_cast(passed_settings.allow_experimental_analyzer)); + /// Analyzer became Beta in 24.3 and started to be enabled by default. + /// We have to disable it for ourselves to make sure we don't have different settings on + /// different servers. + if (query_kind == ClientInfo::QueryKind::SECONDARY_QUERY + && client_info.getVersionNumber() < VersionNumber(23, 3, 0) + && !passed_settings.allow_experimental_analyzer.changed) + passed_settings.set("allow_experimental_analyzer", false); auto settings_changes = passed_settings.changes(); query_kind = query_context->getClientInfo().query_kind; From fe6a875c7473d814011f4ae202942232e0801427 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 3 Jul 2024 21:51:40 +0000 Subject: [PATCH 250/273] Make the setting back IMPORTANT + fix build --- src/Core/Settings.h | 2 +- src/Server/TCPHandler.cpp | 2 +- tests/integration/helpers/cluster.py | 2 +- .../test.py | 35 +------------------ 4 files changed, 4 insertions(+), 37 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f6d282792db..9da0b297e9e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -399,7 +399,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", 0) \ + M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", IMPORTANT) \ M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 443cc99475f..ac1423f87c1 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1875,7 +1875,7 @@ void TCPHandler::receiveQuery() #endif } - query_context = session->makeQueryContext(std::move(client_info)); + query_context = session->makeQueryContext(client_info); /// Sets the default database if it wasn't set earlier for the session context. if (is_interserver_mode && !default_database.empty()) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 544b06cca1b..34f5c28fef8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -73,7 +73,7 @@ CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.lo # Minimum version we use in integration tests to check compatibility with old releases # Keep in mind that we only support upgrading between releases that are at most 1 year different. # This means that this minimum need to be, at least, 1 year older than the current release -CLICKHOUSE_CI_MIN_TESTED_VERSION = "22.8" +CLICKHOUSE_CI_MIN_TESTED_VERSION = "23.3" # to create docker-compose env file diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 50d7be4d11e..7ecb2cda257 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -7,7 +7,7 @@ import uuid import time from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION +from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) @@ -27,9 +27,6 @@ def make_instance(name, *args, **kwargs): ) -# DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 added in 23.3, ensure that CLICKHOUSE_CI_MIN_TESTED_VERSION fits -assert CLICKHOUSE_CI_MIN_TESTED_VERSION < "23.3" - # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user n1 = make_instance( @@ -38,14 +35,6 @@ n1 = make_instance( user_configs=["configs/users.d/new_user.xml"], ) n2 = make_instance("n2", main_configs=["configs/remote_servers_n2.xml"]) -backward = make_instance( - "backward", - main_configs=["configs/remote_servers_backward.xml"], - image="clickhouse/clickhouse-server", - # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 - tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, - with_installed_binary=True, -) users = pytest.mark.parametrize( "user,password", @@ -427,28 +416,6 @@ def test_per_user_protocol_settings_secure_cluster(user, password): ) -@users -def test_user_secure_cluster_with_backward(user, password): - id_ = "with-backward-query-dist_secure-" + user - n1.query( - f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password - ) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - -@users -def test_user_secure_cluster_from_backward(user, password): - id_ = "from-backward-query-dist_secure-" + user - backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - assert n1.contains_in_log( - "Using deprecated interserver protocol because the client is too old. Consider upgrading all nodes in cluster." - ) - - def test_secure_cluster_distributed_over_distributed_different_users(): # This works because we will have initial_user='default' n1.query( From c93d8cbb66ad7ae5a1e9c4f46f0c351944ff04e9 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 4 Jul 2024 13:57:47 +0200 Subject: [PATCH 251/273] Fixes --- contrib/jemalloc-cmake/CMakeLists.txt | 6 +++++- programs/keeper/Keeper.cpp | 5 +++++ programs/server/Server.cpp | 5 +++++ src/Common/Jemalloc.cpp | 26 +++++++++++++++++++++++++- src/Common/Jemalloc.h | 4 ++++ 5 files changed, 44 insertions(+), 2 deletions(-) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index cc5a391676f..38ebcc8f680 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -34,7 +34,11 @@ if (OS_LINUX) # avoid spurious latencies and additional work associated with # MADV_DONTNEED. See # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false") + if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") + else() + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") + endif() else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") endif() diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index f14ef2e5552..fc3778593a6 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -27,6 +27,8 @@ #include #include +#include + #include #include @@ -277,6 +279,9 @@ HTTPContextPtr httpContext() int Keeper::main(const std::vector & /*args*/) try { +#if USE_JEMALLOC + setJemallocBackgroundThreads(true); +#endif Poco::Logger * log = &logger(); UseSSL use_ssl; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 4cb3b5f45c7..1277249b462 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -656,6 +657,10 @@ static void initializeAzureSDKLogger( int Server::main(const std::vector & /*args*/) try { +#if USE_JEMALLOC + setJemallocBackgroundThreads(true); +#endif + Stopwatch startup_watch; Poco::Logger * log = &logger(); diff --git a/src/Common/Jemalloc.cpp b/src/Common/Jemalloc.cpp index fbe2f62c944..d7cc246db6a 100644 --- a/src/Common/Jemalloc.cpp +++ b/src/Common/Jemalloc.cpp @@ -46,6 +46,20 @@ void checkJemallocProfilingEnabled() "set: MALLOC_CONF=background_thread:true,prof:true"); } +template +void setJemallocValue(const char * name, T value) +{ + T old_value; + size_t old_value_size = sizeof(T); + if (mallctl(name, &old_value, &old_value_size, reinterpret_cast(&value), sizeof(T))) + { + LOG_WARNING(getLogger("Jemalloc"), "mallctl for {} failed", name); + return; + } + + LOG_INFO(getLogger("Jemalloc"), "Value for {} set to {} (from {})", name, value, old_value); +} + void setJemallocProfileActive(bool value) { checkJemallocProfilingEnabled(); @@ -58,7 +72,7 @@ void setJemallocProfileActive(bool value) return; } - mallctl("prof.active", nullptr, nullptr, &value, sizeof(bool)); + setJemallocValue("prof.active", value); LOG_TRACE(getLogger("SystemJemalloc"), "Profiling is {}", value ? "enabled" : "disabled"); } @@ -84,6 +98,16 @@ std::string flushJemallocProfile(const std::string & file_prefix) return profile_dump_path; } +void setJemallocBackgroundThreads(bool enabled) +{ + setJemallocValue("background_thread", enabled); +} + +void setJemallocMaxBackgroundThreads(size_t max_threads) +{ + setJemallocValue("max_background_threads", max_threads); +} + } #endif diff --git a/src/Common/Jemalloc.h b/src/Common/Jemalloc.h index 80ff0f1a319..499a906fd3d 100644 --- a/src/Common/Jemalloc.h +++ b/src/Common/Jemalloc.h @@ -17,6 +17,10 @@ void setJemallocProfileActive(bool value); std::string flushJemallocProfile(const std::string & file_prefix); +void setJemallocBackgroundThreads(bool enabled); + +void setJemallocMaxBackgroundThreads(size_t max_threads); + } #endif From da5f3c1efd940488413b34d3e5f8855460f0ce80 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 4 Jul 2024 13:01:57 +0000 Subject: [PATCH 252/273] Fix test --- .../test_functions.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index 758dda655da..e5023c062ff 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -130,10 +130,13 @@ def test_string_functions(start_cluster): functions = map(lambda x: x.strip(), functions) excludes = [ + # The argument of this function is not a seed, but an arbitrary expression needed for bypassing common subexpression elimination. "rand", "rand64", "randConstant", + "randCanonical", "generateUUIDv4", + "generateULID", # Syntax error otherwise "position", "substring", @@ -153,6 +156,16 @@ def test_string_functions(start_cluster): "tryBase64Decode", # Removed in 23.9 "meiliMatch", + # These functions require more than one argument. + "parseDateTimeInJodaSyntaxOrZero", + "parseDateTimeInJodaSyntaxOrNull", + "parseDateTimeOrNull", + "parseDateTimeOrZero", + "parseDateTime", + # The argument is effectively a disk name (and we don't have one with name foo) + "filesystemUnreserved", + "filesystemCapacity", + "filesystemAvailable", ] functions = filter(lambda x: x not in excludes, functions) @@ -205,6 +218,9 @@ def test_string_functions(start_cluster): # Function X takes exactly one parameter: # The function 'X' can only be used as a window function "BAD_ARGUMENTS", + # String foo is obviously not a valid IP address. + "CANNOT_PARSE_IPV4", + "CANNOT_PARSE_IPV6", ] if any(map(lambda x: x in error_message, allowed_errors)): logging.info("Skipping %s", function) From dbfe6323821739af4c8856e799cb5d861377439d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 4 Jul 2024 13:11:13 +0000 Subject: [PATCH 253/273] Correct the test to exclude farmHash for now --- tests/integration/test_backward_compatibility/test_functions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index e5023c062ff..fc03a77030e 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -166,6 +166,8 @@ def test_string_functions(start_cluster): "filesystemUnreserved", "filesystemCapacity", "filesystemAvailable", + # Exclude it for now. Looks like the result depends on the build type. + "farmHash64", ] functions = filter(lambda x: x not in excludes, functions) From 32a61e6088a8d5b18d7f217c45c27f1268db2501 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Jul 2024 15:38:44 +0200 Subject: [PATCH 254/273] Bump From 1e5bc5bc8c7e65908e7127e06e1f5008f6fbfa20 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 4 Jul 2024 15:48:12 +0200 Subject: [PATCH 255/273] fix flaky test --- .../0_stateless/03172_error_log_table_not_empty.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/03172_error_log_table_not_empty.sh b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh index 8d74ebe1039..4b83400f5de 100755 --- a/tests/queries/0_stateless/03172_error_log_table_not_empty.sh +++ b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh @@ -4,17 +4,19 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# system.error_log is created lazy, flush logs query makes it sure that the table is created. +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS;" + # Get the previous number of errors for 111, 222 and 333 errors_111=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 111") errors_222=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 222") errors_333=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 333") -# Throw three random errors: 111, 222 and 333 and wait for more than collect_interval_milliseconds to ensure system.error_log is flushed +# Throw three random errors: 111, 222 and 333 and call flush logs to ensure system.error_log is flushed $CLICKHOUSE_CLIENT -mn -q " SELECT throwIf(true, 'error_log', toInt16(111)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 111 } SELECT throwIf(true, 'error_log', toInt16(222)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 222 } SELECT throwIf(true, 'error_log', toInt16(333)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 333 } -SELECT sleep(2) format NULL; SYSTEM FLUSH LOGS; " @@ -30,7 +32,6 @@ $CLICKHOUSE_CLIENT -mn -q " SELECT throwIf(true, 'error_log', toInt16(111)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 111 } SELECT throwIf(true, 'error_log', toInt16(222)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 222 } SELECT throwIf(true, 'error_log', toInt16(333)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 333 } -SELECT sleep(2) format NULL; SYSTEM FLUSH LOGS; " @@ -38,4 +39,4 @@ $CLICKHOUSE_CLIENT -mn -q " SELECT sum(value) > $(($errors_111+1)) FROM system.error_log WHERE code = 111; SELECT sum(value) > $(($errors_222+1)) FROM system.error_log WHERE code = 222; SELECT sum(value) > $(($errors_333+1)) FROM system.error_log WHERE code = 333; -" \ No newline at end of file +" From 11a30d6d6026dca61247590ce337e8e766601c5a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 4 Jul 2024 14:14:19 +0000 Subject: [PATCH 256/273] Bump s2geometry to latest master --- contrib/s2geometry | 2 +- contrib/s2geometry-cmake/CMakeLists.txt | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/contrib/s2geometry b/contrib/s2geometry index 0547c383717..0146e2d1355 160000 --- a/contrib/s2geometry +++ b/contrib/s2geometry @@ -1 +1 @@ -Subproject commit 0547c38371777a1c1c8be263a6f05c3bf71bb05b +Subproject commit 0146e2d1355828f8f633cb050948250ad7406c57 diff --git a/contrib/s2geometry-cmake/CMakeLists.txt b/contrib/s2geometry-cmake/CMakeLists.txt index 6632f9c27d5..48562b8cead 100644 --- a/contrib/s2geometry-cmake/CMakeLists.txt +++ b/contrib/s2geometry-cmake/CMakeLists.txt @@ -1,7 +1,7 @@ -option(ENABLE_S2_GEOMETRY "Enable S2 geometry library" ${ENABLE_LIBRARIES}) +option(ENABLE_S2_GEOMETRY "Enable S2 Geometry" ${ENABLE_LIBRARIES}) if (NOT ENABLE_S2_GEOMETRY) - message(STATUS "Not using S2 geometry") + message(STATUS "Not using S2 Geometry") return() endif() @@ -38,6 +38,7 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2cell_index.cc" "${S2_SOURCE_DIR}/s2/s2cell_union.cc" "${S2_SOURCE_DIR}/s2/s2centroids.cc" + "${S2_SOURCE_DIR}/s2/s2chain_interpolation_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_cell_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_point_query.cc" @@ -46,6 +47,7 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2coords.cc" "${S2_SOURCE_DIR}/s2/s2crossing_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2debug.cc" + "${S2_SOURCE_DIR}/s2/s2density_tree.cc" "${S2_SOURCE_DIR}/s2/s2earth.cc" "${S2_SOURCE_DIR}/s2/s2edge_clipping.cc" "${S2_SOURCE_DIR}/s2/s2edge_crosser.cc" @@ -53,8 +55,10 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2edge_distances.cc" "${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc" "${S2_SOURCE_DIR}/s2/s2error.cc" + "${S2_SOURCE_DIR}/s2/s2fractal.cc" "${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2hausdorff_distance_query.cc" + "${S2_SOURCE_DIR}/s2/s2index_cell_data.cc" "${S2_SOURCE_DIR}/s2/s2latlng.cc" "${S2_SOURCE_DIR}/s2/s2latlng_rect.cc" "${S2_SOURCE_DIR}/s2/s2latlng_rect_bounder.cc" @@ -63,10 +67,10 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2lax_polyline_shape.cc" "${S2_SOURCE_DIR}/s2/s2loop.cc" "${S2_SOURCE_DIR}/s2/s2loop_measures.cc" + "${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2measures.cc" "${S2_SOURCE_DIR}/s2/s2memory_tracker.cc" "${S2_SOURCE_DIR}/s2/s2metrics.cc" - "${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2min_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2padded_cell.cc" "${S2_SOURCE_DIR}/s2/s2point_compression.cc" @@ -80,10 +84,11 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2predicates.cc" "${S2_SOURCE_DIR}/s2/s2projections.cc" "${S2_SOURCE_DIR}/s2/s2r2rect.cc" - "${S2_SOURCE_DIR}/s2/s2region.cc" - "${S2_SOURCE_DIR}/s2/s2region_term_indexer.cc" + "${S2_SOURCE_DIR}/s2/s2random.cc" "${S2_SOURCE_DIR}/s2/s2region_coverer.cc" "${S2_SOURCE_DIR}/s2/s2region_intersection.cc" + "${S2_SOURCE_DIR}/s2/s2region_sharder.cc" + "${S2_SOURCE_DIR}/s2/s2region_term_indexer.cc" "${S2_SOURCE_DIR}/s2/s2region_union.cc" "${S2_SOURCE_DIR}/s2/s2shape_index.cc" "${S2_SOURCE_DIR}/s2/s2shape_index_buffered_region.cc" @@ -94,9 +99,12 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_conversion.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_count_vertices.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_wrap.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc" + "${S2_SOURCE_DIR}/s2/s2testing.cc" "${S2_SOURCE_DIR}/s2/s2text_format.cc" "${S2_SOURCE_DIR}/s2/s2wedge_relations.cc" "${S2_SOURCE_DIR}/s2/s2winding_operation.cc" @@ -140,6 +148,7 @@ target_link_libraries(_s2 PRIVATE absl::strings absl::type_traits absl::utility + absl::vlog_is_on ) target_include_directories(_s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/") From e7e2b0953c0df57c95f6b5e0f6cc1afe5914c4e9 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Thu, 4 Jul 2024 14:50:51 +0000 Subject: [PATCH 257/273] Prevent another possible buffer overflow --- src/Functions/bitShiftLeft.cpp | 4 ++-- src/Functions/bitShiftRight.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index 8e39ed86461..d561430d51f 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -42,7 +42,7 @@ struct BitShiftLeftImpl { const UInt8 word_size = 8 * sizeof(*pos); size_t n = end - pos; - const UInt256 bit_limit = word_size * n; + const UInt128 bit_limit = static_cast(word_size) * n; if (b < 0 || static_cast(b) > bit_limit) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if (b == bit_limit) @@ -110,7 +110,7 @@ struct BitShiftLeftImpl { const UInt8 word_size = 8; size_t n = end - pos; - const UInt256 bit_limit = word_size * n; + const UInt128 bit_limit = static_cast(word_size) * n; if (b < 0 || static_cast(b) > bit_limit) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if (b == bit_limit) diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 46cfcde8a33..05b8581c792 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -58,7 +58,7 @@ struct BitShiftRightImpl { const UInt8 word_size = 8; size_t n = end - pos; - const UInt256 bit_limit = word_size * n; + const UInt128 bit_limit = static_cast(word_size) * n; if (b < 0 || static_cast(b) > bit_limit) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if (b == bit_limit) @@ -98,7 +98,7 @@ struct BitShiftRightImpl { const UInt8 word_size = 8; size_t n = end - pos; - const UInt256 bit_limit = word_size * n; + const UInt128 bit_limit = static_cast(word_size) * n; if (b < 0 || static_cast(b) > bit_limit) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if (b == bit_limit) From 93afc8e6133365007488c4d8340f434f6e8a876f Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 4 Jul 2024 15:11:29 +0000 Subject: [PATCH 258/273] more precise warning message about sanitizers --- programs/server/Server.cpp | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index f992fdc13a9..d51d959a42a 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -625,6 +625,28 @@ static void initializeAzureSDKLogger( #endif } +#if defined(SANITIZER) +static std::vector getSanitizerNames() +{ + std::vector names; + +#if defined(ADDRESS_SANITIZER) + names.push_back("address"); +#endif +#if defined(THREAD_SANITIZER) + names.push_back("thread"); +#endif +#if defined(MEMORY_SANITIZER) + names.push_back("memory"); +#endif +#if defined(UNDEFINED_BEHAVIOR_SANITIZER) + names.push_back("undefined behavior"); +#endif + + return names; +} +#endif + int Server::main(const std::vector & /*args*/) try { @@ -711,7 +733,17 @@ try global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable."); #if defined(SANITIZER) - global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); + auto sanitizers = getSanitizerNames(); + + String log_message; + if (sanitizers.empty()) + log_message = "sanitizer"; + else if (sanitizers.size() == 1) + log_message = fmt::format("{} sanitizer", sanitizers.front()); + else + log_message = fmt::format("sanitizers ({})", fmt::join(sanitizers, ", ")); + + global_context->addWarningMessage(fmt::format("Server was built with {}. It will work slowly.", log_message)); #endif #if defined(SANITIZE_COVERAGE) || WITH_COVERAGE From 24ff0f601d5b8d474429d67b5ed8702c662c58ec Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 4 Jul 2024 17:15:32 +0200 Subject: [PATCH 259/273] update keeper bench example config file --- utils/keeper-bench/example.yaml | 67 +++++++++++++++++---------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/utils/keeper-bench/example.yaml b/utils/keeper-bench/example.yaml index e800e923482..c3a62a01eac 100644 --- a/utils/keeper-bench/example.yaml +++ b/utils/keeper-bench/example.yaml @@ -18,45 +18,46 @@ connections: host: "localhost:9181" -generator: - setup: +setup: + node: + name: "test3" + node: + name: "test_create" + node: + name: "test4" + node: + name: "test" + data: "somedata" node: - name: "test3" + repeat: 4 + name: + random_string: + size: 15 + data: + random_string: + size: + min_value: 10 + max_value: 20 node: - name: "test_create" - node: - name: "test4" - node: - name: "test" - data: "somedata" - node: - repeat: 4 - name: - random_string: - size: 15 - data: - random_string: - size: - min_value: 10 - max_value: 20 + repeat: 2 node: repeat: 2 - node: - repeat: 2 - name: - random_string: - size: 12 name: random_string: - size: 15 - data: - random_string: - size: - min_value: 10 - max_value: 20 - node: - name: "test2" - data: "somedata" + size: 12 + name: + random_string: + size: 15 + data: + random_string: + size: + min_value: 10 + max_value: 20 + node: + name: "test2" + data: "somedata" + +generator: requests: create: path: "/test_create" From 2c9421812063cc22133f508e10033accb611d6d1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 4 Jul 2024 15:01:40 +0000 Subject: [PATCH 260/273] Random header fixes for libcxx 16 --- base/poco/Foundation/include/Poco/Logger.h | 2 ++ base/poco/Foundation/include/Poco/Message.h | 1 + src/Common/formatIPv6.h | 1 + src/Coordination/Changelog.h | 1 + src/Coordination/FourLetterCommand.h | 1 + src/Disks/ObjectStorages/MetadataOperationsHolder.h | 1 + src/IO/Archives/hasRegisteredArchiveFileExtension.cpp | 2 ++ src/Loggers/OwnSplitChannel.h | 1 + src/Storages/MergeTree/IPartMetadataManager.h | 1 + 9 files changed, 11 insertions(+) diff --git a/base/poco/Foundation/include/Poco/Logger.h b/base/poco/Foundation/include/Poco/Logger.h index 2a1cb33b407..74ddceea9dd 100644 --- a/base/poco/Foundation/include/Poco/Logger.h +++ b/base/poco/Foundation/include/Poco/Logger.h @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include "Poco/Channel.h" diff --git a/base/poco/Foundation/include/Poco/Message.h b/base/poco/Foundation/include/Poco/Message.h index 9068e56a93c..756e427c5f5 100644 --- a/base/poco/Foundation/include/Poco/Message.h +++ b/base/poco/Foundation/include/Poco/Message.h @@ -19,6 +19,7 @@ #include +#include #include "Poco/Foundation.h" #include "Poco/Timestamp.h" diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h index bb83e0381ef..abeda95ed0d 100644 --- a/src/Common/formatIPv6.h +++ b/src/Common/formatIPv6.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 2e8dbe75e90..c9b45d9a344 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -5,6 +5,7 @@ #include #include +#include #include #include diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index 82b30a0b5f6..2a53bade62f 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -2,6 +2,7 @@ #include "config.h" +#include #include #include #include diff --git a/src/Disks/ObjectStorages/MetadataOperationsHolder.h b/src/Disks/ObjectStorages/MetadataOperationsHolder.h index 8997f40b9a2..a042f4bd8b9 100644 --- a/src/Disks/ObjectStorages/MetadataOperationsHolder.h +++ b/src/Disks/ObjectStorages/MetadataOperationsHolder.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp b/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp index 2a979f500f7..407977f1f13 100644 --- a/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp +++ b/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp @@ -1,5 +1,7 @@ #include +#include +#include namespace DB { diff --git a/src/Loggers/OwnSplitChannel.h b/src/Loggers/OwnSplitChannel.h index 7ca27cf6584..88bb6b9ce76 100644 --- a/src/Loggers/OwnSplitChannel.h +++ b/src/Loggers/OwnSplitChannel.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Storages/MergeTree/IPartMetadataManager.h b/src/Storages/MergeTree/IPartMetadataManager.h index cef1d10e4ad..e817421f7d0 100644 --- a/src/Storages/MergeTree/IPartMetadataManager.h +++ b/src/Storages/MergeTree/IPartMetadataManager.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include From 14f54cb6e96066d90946a7e97ebd87b76160ab14 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 4 Jul 2024 15:44:16 +0000 Subject: [PATCH 261/273] slightly better calculation of primary index --- .../MergeTree/IMergeTreeDataPartWriter.cpp | 19 +++++- .../MergeTreeDataPartWriterOnDisk.cpp | 65 ++++++++++--------- .../MergeTree/MergeTreeDataPartWriterOnDisk.h | 9 +-- 3 files changed, 55 insertions(+), 38 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index 6152da78395..c87f66b64f3 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -71,9 +72,21 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( Columns IMergeTreeDataPartWriter::releaseIndexColumns() { - return Columns( - std::make_move_iterator(index_columns.begin()), - std::make_move_iterator(index_columns.end())); + /// The memory for index was allocated without thread memory tracker. + /// We need to deallocate it in shrinkToFit without memory tracker as well. + MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; + + Columns result; + result.reserve(index_columns.size()); + + for (auto & column : index_columns) + { + column->shrinkToFit(); + result.push_back(std::move(column)); + } + + index_columns.clear(); + return result; } SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index a576720294f..5c9191dbb54 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -254,6 +254,12 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() index_compressor_stream = std::make_unique(*index_file_hashing_stream, primary_key_compression_codec, settings.primary_key_compress_block_size); index_source_hashing_stream = std::make_unique(*index_compressor_stream); } + + const auto & primary_key_types = metadata_snapshot->getPrimaryKey().data_types; + index_serializations.reserve(primary_key_types.size()); + + for (const auto & type : primary_key_types) + index_serializations.push_back(type->getDefaultSerialization()); } } @@ -299,22 +305,33 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() store = std::make_shared(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; } + skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings)); skip_index_accumulated_marks.push_back(0); } } +void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row) +{ + chassert(index_block.columns() == index_serializations.size()); + auto & index_stream = compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream; + + for (size_t i = 0; i < index_block.columns(); ++i) + { + const auto & column = index_block.getByPosition(i).column; + + index_columns[i]->insertFrom(*column, row); + index_serializations[i]->serializeBinary(*column, row, index_stream, {}); + } +} + void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Block & primary_index_block, const Granules & granules_to_write) { - size_t primary_columns_num = primary_index_block.columns(); + if (!metadata_snapshot->hasPrimaryKey()) + return; + if (index_columns.empty()) - { - index_types = primary_index_block.getDataTypes(); - index_columns.resize(primary_columns_num); - last_block_index_columns.resize(primary_columns_num); - for (size_t i = 0; i < primary_columns_num; ++i) - index_columns[i] = primary_index_block.getByPosition(i).column->cloneEmpty(); - } + index_columns = primary_index_block.cloneEmptyColumns(); { /** While filling index (index_columns), disable memory tracker. @@ -328,22 +345,14 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc /// Write index. The index contains Primary Key value for each `index_granularity` row. for (const auto & granule : granules_to_write) { - if (metadata_snapshot->hasPrimaryKey() && granule.mark_on_start) - { - for (size_t j = 0; j < primary_columns_num; ++j) - { - const auto & primary_column = primary_index_block.getByPosition(j); - index_columns[j]->insertFrom(*primary_column.column, granule.start_row); - primary_column.type->getDefaultSerialization()->serializeBinary( - *primary_column.column, granule.start_row, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); - } - } + if (granule.mark_on_start) + calculateAndSerializePrimaryIndexRow(primary_index_block, granule.start_row); } } - /// store last index row to write final mark at the end of column - for (size_t j = 0; j < primary_columns_num; ++j) - last_block_index_columns[j] = primary_index_block.getByPosition(j).column; + /// Store block with last index row to write final mark at the end of column + if (with_final_mark) + last_index_block = primary_index_block; } void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block) @@ -420,17 +429,11 @@ void MergeTreeDataPartWriterOnDisk::fillPrimaryIndexChecksums(MergeTreeData::Dat if (index_file_hashing_stream) { - if (write_final_mark) + if (write_final_mark && last_index_block) { - for (size_t j = 0; j < index_columns.size(); ++j) - { - const auto & column = *last_block_index_columns[j]; - size_t last_row_number = column.size() - 1; - index_columns[j]->insertFrom(column, last_row_number); - index_types[j]->getDefaultSerialization()->serializeBinary( - column, last_row_number, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); - } - last_block_index_columns.clear(); + MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; + calculateAndSerializePrimaryIndexRow(last_index_block, last_index_block.rows() - 1); + last_index_block.clear(); } if (compress_primary_key) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index bdf0fdb7f32..8d84442981e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -173,10 +173,10 @@ protected: std::unique_ptr index_source_hashing_stream; bool compress_primary_key; - DataTypes index_types; - /// Index columns from the last block - /// It's written to index file in the `writeSuffixAndFinalizePart` method - Columns last_block_index_columns; + /// Last block with index columns. + /// It's written to index file in the `writeSuffixAndFinalizePart` method. + Block last_index_block; + Serializations index_serializations; bool data_written = false; @@ -193,6 +193,7 @@ private: void initStatistics(); virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0; + void calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row); struct ExecutionStatistics { From 6dd13dd34ab397d5e18d01820a064f18ed25595a Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 4 Jul 2024 17:59:07 +0200 Subject: [PATCH 262/273] fix clean-up process --- utils/keeper-bench/Runner.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 5ae4c7a0b1c..587e015b340 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -1311,9 +1311,9 @@ void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & pa while (!children_span.empty()) { Coordination::Requests ops; - for (size_t i = 0; i < 1000 && !children.empty(); ++i) + for (size_t i = 0; i < 1000 && !children_span.empty(); ++i) { - removeRecursive(zookeeper, fs::path(path) / children.back()); + removeRecursive(zookeeper, fs::path(path) / children_span.back()); ops.emplace_back(zkutil::makeRemoveRequest(fs::path(path) / children_span.back(), -1)); children_span = children_span.subspan(0, children_span.size() - 1); } From c98b411edd34450c9954f8d086ae014fb80d1d8a Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 4 Jul 2024 16:11:12 +0000 Subject: [PATCH 263/273] fix tests --- .../0_stateless/02993_lazy_index_loading.reference | 2 +- .../03127_system_unload_primary_key_table.reference | 8 ++++---- .../0_stateless/03128_system_unload_primary_key.reference | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/02993_lazy_index_loading.reference b/tests/queries/0_stateless/02993_lazy_index_loading.reference index 5bc329ae4eb..08f07a92815 100644 --- a/tests/queries/0_stateless/02993_lazy_index_loading.reference +++ b/tests/queries/0_stateless/02993_lazy_index_loading.reference @@ -1,4 +1,4 @@ -100000000 140000000 +100000000 100000000 0 0 1 100000000 100000000 diff --git a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference index 3ac6127fb21..2d33f7f6683 100644 --- a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference +++ b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference @@ -1,8 +1,8 @@ -100000000 140000000 -100000000 140000000 -100000000 140000000 +100000000 100000000 +100000000 100000000 +100000000 100000000 0 0 -100000000 140000000 +100000000 100000000 0 0 0 0 1 diff --git a/tests/queries/0_stateless/03128_system_unload_primary_key.reference b/tests/queries/0_stateless/03128_system_unload_primary_key.reference index c7b40ae5b06..2646dc7247f 100644 --- a/tests/queries/0_stateless/03128_system_unload_primary_key.reference +++ b/tests/queries/0_stateless/03128_system_unload_primary_key.reference @@ -1,4 +1,4 @@ -100000000 140000000 -100000000 140000000 +100000000 100000000 +100000000 100000000 0 0 0 0 From 78a2139f2a43752196a029995b6965ada359c954 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 4 Jul 2024 19:27:10 +0200 Subject: [PATCH 264/273] restore timeouts, mark as no-fasttests --- .../queries/0_stateless/03172_error_log_table_not_empty.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03172_error_log_table_not_empty.sh b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh index 4b83400f5de..22a2fd82c64 100755 --- a/tests/queries/0_stateless/03172_error_log_table_not_empty.sh +++ b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh @@ -1,4 +1,6 @@ #!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: this test relies on the timeouts, it always takes no less that 4 seconds to run CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -12,11 +14,12 @@ errors_111=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHER errors_222=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 222") errors_333=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 333") -# Throw three random errors: 111, 222 and 333 and call flush logs to ensure system.error_log is flushed +# Throw three random errors: 111, 222 and 333 and wait for more than collect_interval_milliseconds to ensure system.error_log is flushed $CLICKHOUSE_CLIENT -mn -q " SELECT throwIf(true, 'error_log', toInt16(111)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 111 } SELECT throwIf(true, 'error_log', toInt16(222)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 222 } SELECT throwIf(true, 'error_log', toInt16(333)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 333 } +SELECT sleep(2) format NULL; SYSTEM FLUSH LOGS; " @@ -32,6 +35,7 @@ $CLICKHOUSE_CLIENT -mn -q " SELECT throwIf(true, 'error_log', toInt16(111)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 111 } SELECT throwIf(true, 'error_log', toInt16(222)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 222 } SELECT throwIf(true, 'error_log', toInt16(333)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 333 } +SELECT sleep(2) format NULL; SYSTEM FLUSH LOGS; " From df0cce24ee4f7e74b53c667cfc8b43a7e3e142ea Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 4 Jul 2024 19:34:47 +0200 Subject: [PATCH 265/273] CI: Fix sync pr merge --- tests/ci/sync_pr.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py index 8251ccbaf38..1b71231f820 100644 --- a/tests/ci/sync_pr.py +++ b/tests/ci/sync_pr.py @@ -101,23 +101,20 @@ def main(): assert pr_info.merged_pr, "BUG. merged PR number could not been determined" prs = gh.get_pulls_from_search( - query=f"head:sync-upstream/pr/{pr_info.merged_pr} org:ClickHouse type:pr", + query=f"head:sync-upstream/pr/{pr_info.merged_pr} org:ClickHouse type:pr is:open", repo="ClickHouse/clickhouse-private", ) - sync_pr = None - if len(prs) > 1: print(f"WARNING: More than one PR found [{prs}] - exiting") elif len(prs) == 0: print("WARNING: No Sync PR found") else: sync_pr = prs[0] - - if args.merge: - merge_sync_pr(gh, sync_pr) - elif args.status: - set_sync_status(gh, pr_info, sync_pr) + if args.merge: + merge_sync_pr(gh, sync_pr) + elif args.status: + set_sync_status(gh, pr_info, sync_pr) if __name__ == "__main__": From 597810b69d7655b9049ded3b529d12a23996a770 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 4 Jul 2024 18:46:09 +0000 Subject: [PATCH 266/273] Fix s390x build --- contrib/s2geometry-cmake/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/contrib/s2geometry-cmake/CMakeLists.txt b/contrib/s2geometry-cmake/CMakeLists.txt index 48562b8cead..5eabe71b538 100644 --- a/contrib/s2geometry-cmake/CMakeLists.txt +++ b/contrib/s2geometry-cmake/CMakeLists.txt @@ -1,6 +1,7 @@ option(ENABLE_S2_GEOMETRY "Enable S2 Geometry" ${ENABLE_LIBRARIES}) -if (NOT ENABLE_S2_GEOMETRY) +# ARCH_S390X broke upstream, it can be re-enabled once https://github.com/google/s2geometry/pull/372 is merged +if (NOT ENABLE_S2_GEOMETRY OR ARCH_S390X) message(STATUS "Not using S2 Geometry") return() endif() From ffe1f8fea019f08de4a9a32f99c1ebce4baeae71 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 4 Jul 2024 15:40:19 +0000 Subject: [PATCH 267/273] Bump Azure to 1.12 --- contrib/azure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/azure b/contrib/azure index 6262a76ef4c..92c94d7f37a 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit 6262a76ef4c4c330c84e58dd4f6f13f4e6230fcd +Subproject commit 92c94d7f37a43cc8fc4d466884a95f610c0593bf From e428542b2ea7340d6314d76c6043134c356677a0 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 18 Feb 2024 12:44:52 +0100 Subject: [PATCH 268/273] Add prometheus protobufs. --- contrib/CMakeLists.txt | 2 + .../prometheus-protobufs-cmake/CMakeLists.txt | 34 +++ contrib/prometheus-protobufs-gogo/LICENSE | 35 +++ contrib/prometheus-protobufs-gogo/README | 4 + .../gogoproto/gogo.proto | 145 +++++++++++++ contrib/prometheus-protobufs/LICENSE | 201 ++++++++++++++++++ contrib/prometheus-protobufs/README | 2 + .../prometheus-protobufs/prompb/remote.proto | 88 ++++++++ .../prometheus-protobufs/prompb/types.proto | 187 ++++++++++++++++ src/CMakeLists.txt | 4 + src/Common/config.h.in | 1 + src/configure_config.cmake | 3 + 12 files changed, 706 insertions(+) create mode 100644 contrib/prometheus-protobufs-cmake/CMakeLists.txt create mode 100644 contrib/prometheus-protobufs-gogo/LICENSE create mode 100644 contrib/prometheus-protobufs-gogo/README create mode 100644 contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto create mode 100644 contrib/prometheus-protobufs/LICENSE create mode 100644 contrib/prometheus-protobufs/README create mode 100644 contrib/prometheus-protobufs/prompb/remote.proto create mode 100644 contrib/prometheus-protobufs/prompb/types.proto diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 08f58335d16..90ae5981a21 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -228,6 +228,8 @@ add_contrib (ulid-c-cmake ulid-c) add_contrib (libssh-cmake libssh) +add_contrib (prometheus-protobufs-cmake prometheus-protobufs prometheus-protobufs-gogo) + # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, diff --git a/contrib/prometheus-protobufs-cmake/CMakeLists.txt b/contrib/prometheus-protobufs-cmake/CMakeLists.txt new file mode 100644 index 00000000000..8c939902be7 --- /dev/null +++ b/contrib/prometheus-protobufs-cmake/CMakeLists.txt @@ -0,0 +1,34 @@ +option(ENABLE_PROMETHEUS_PROTOBUFS "Enable Prometheus Protobufs" ${ENABLE_PROTOBUF}) + +if(NOT ENABLE_PROMETHEUS_PROTOBUFS) + message(STATUS "Not using prometheus-protobufs") + return() +endif() + +set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf/src") +set(Prometheus_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/prometheus-protobufs") +set(GogoProto_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/prometheus-protobufs-gogo") + +# Protobuf_IMPORT_DIRS specify where the protobuf compiler will look for .proto files. +set(Old_Protobuf_IMPORT_DIRS ${Protobuf_IMPORT_DIRS}) +list(APPEND Protobuf_IMPORT_DIRS "${Protobuf_INCLUDE_DIR}" "${Prometheus_INCLUDE_DIR}" "${GogoProto_INCLUDE_DIR}") + +PROTOBUF_GENERATE_CPP(prometheus_protobufs_sources prometheus_protobufs_headers + "prompb/remote.proto" + "prompb/types.proto" + "gogoproto/gogo.proto" +) + +set(Protobuf_IMPORT_DIRS ${Old_Protobuf_IMPORT_DIRS}) + +# Ignore warnings while compiling protobuf-generated *.pb.h and *.pb.cpp files. +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w") + +# Disable clang-tidy for protobuf-generated *.pb.h and *.pb.cpp files. +set (CMAKE_CXX_CLANG_TIDY "") + +add_library(_prometheus_protobufs ${prometheus_protobufs_sources} ${prometheus_protobufs_headers}) +target_include_directories(_prometheus_protobufs SYSTEM PUBLIC "${CMAKE_CURRENT_BINARY_DIR}") +target_link_libraries (_prometheus_protobufs PUBLIC ch_contrib::protobuf) + +add_library (ch_contrib::prometheus_protobufs ALIAS _prometheus_protobufs) diff --git a/contrib/prometheus-protobufs-gogo/LICENSE b/contrib/prometheus-protobufs-gogo/LICENSE new file mode 100644 index 00000000000..16be18e5c50 --- /dev/null +++ b/contrib/prometheus-protobufs-gogo/LICENSE @@ -0,0 +1,35 @@ +Copyright (c) 2022, The Cosmos SDK Authors. All rights reserved. +Copyright (c) 2013, The GoGo Authors. All rights reserved. + +Protocol Buffers for Go with Gadgets + +Go support for Protocol Buffers - Google's data interchange format + +Copyright 2010 The Go Authors. All rights reserved. +https://github.com/golang/protobuf + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/contrib/prometheus-protobufs-gogo/README b/contrib/prometheus-protobufs-gogo/README new file mode 100644 index 00000000000..c40bc42df66 --- /dev/null +++ b/contrib/prometheus-protobufs-gogo/README @@ -0,0 +1,4 @@ +File "gogoproto/gogo.proto" was downloaded from the "Protocol Buffers for Go with Gadgets" project: +https://github.com/cosmos/gogoproto/blob/main/gogoproto/gogo.proto + +File "gogoproto/gogo.proto" is used in ClickHouse to compile prometheus protobufs. diff --git a/contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto b/contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto new file mode 100644 index 00000000000..974b36a7ccd --- /dev/null +++ b/contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto @@ -0,0 +1,145 @@ +// Protocol Buffers for Go with Gadgets +// +// Copyright (c) 2013, The GoGo Authors. All rights reserved. +// http://github.com/cosmos/gogoproto +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +syntax = "proto2"; +package gogoproto; + +import "google/protobuf/descriptor.proto"; + +option java_package = "com.google.protobuf"; +option java_outer_classname = "GoGoProtos"; +option go_package = "github.com/cosmos/gogoproto/gogoproto"; + +extend google.protobuf.EnumOptions { + optional bool goproto_enum_prefix = 62001; + optional bool goproto_enum_stringer = 62021; + optional bool enum_stringer = 62022; + optional string enum_customname = 62023; + optional bool enumdecl = 62024; +} + +extend google.protobuf.EnumValueOptions { + optional string enumvalue_customname = 66001; +} + +extend google.protobuf.FileOptions { + optional bool goproto_getters_all = 63001; + optional bool goproto_enum_prefix_all = 63002; + optional bool goproto_stringer_all = 63003; + optional bool verbose_equal_all = 63004; + optional bool face_all = 63005; + optional bool gostring_all = 63006; + optional bool populate_all = 63007; + optional bool stringer_all = 63008; + optional bool onlyone_all = 63009; + + optional bool equal_all = 63013; + optional bool description_all = 63014; + optional bool testgen_all = 63015; + optional bool benchgen_all = 63016; + optional bool marshaler_all = 63017; + optional bool unmarshaler_all = 63018; + optional bool stable_marshaler_all = 63019; + + optional bool sizer_all = 63020; + + optional bool goproto_enum_stringer_all = 63021; + optional bool enum_stringer_all = 63022; + + optional bool unsafe_marshaler_all = 63023; + optional bool unsafe_unmarshaler_all = 63024; + + optional bool goproto_extensions_map_all = 63025; + optional bool goproto_unrecognized_all = 63026; + optional bool gogoproto_import = 63027; + optional bool protosizer_all = 63028; + optional bool compare_all = 63029; + optional bool typedecl_all = 63030; + optional bool enumdecl_all = 63031; + + optional bool goproto_registration = 63032; + optional bool messagename_all = 63033; + + optional bool goproto_sizecache_all = 63034; + optional bool goproto_unkeyed_all = 63035; +} + +extend google.protobuf.MessageOptions { + optional bool goproto_getters = 64001; + optional bool goproto_stringer = 64003; + optional bool verbose_equal = 64004; + optional bool face = 64005; + optional bool gostring = 64006; + optional bool populate = 64007; + optional bool stringer = 67008; + optional bool onlyone = 64009; + + optional bool equal = 64013; + optional bool description = 64014; + optional bool testgen = 64015; + optional bool benchgen = 64016; + optional bool marshaler = 64017; + optional bool unmarshaler = 64018; + optional bool stable_marshaler = 64019; + + optional bool sizer = 64020; + + optional bool unsafe_marshaler = 64023; + optional bool unsafe_unmarshaler = 64024; + + optional bool goproto_extensions_map = 64025; + optional bool goproto_unrecognized = 64026; + + optional bool protosizer = 64028; + optional bool compare = 64029; + + optional bool typedecl = 64030; + + optional bool messagename = 64033; + + optional bool goproto_sizecache = 64034; + optional bool goproto_unkeyed = 64035; +} + +extend google.protobuf.FieldOptions { + optional bool nullable = 65001; + optional bool embed = 65002; + optional string customtype = 65003; + optional string customname = 65004; + optional string jsontag = 65005; + optional string moretags = 65006; + optional string casttype = 65007; + optional string castkey = 65008; + optional string castvalue = 65009; + + optional bool stdtime = 65010; + optional bool stdduration = 65011; + optional bool wktpointer = 65012; + + optional string castrepeated = 65013; +} diff --git a/contrib/prometheus-protobufs/LICENSE b/contrib/prometheus-protobufs/LICENSE new file mode 100644 index 00000000000..261eeb9e9f8 --- /dev/null +++ b/contrib/prometheus-protobufs/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/contrib/prometheus-protobufs/README b/contrib/prometheus-protobufs/README new file mode 100644 index 00000000000..c557e59bb93 --- /dev/null +++ b/contrib/prometheus-protobufs/README @@ -0,0 +1,2 @@ +Files "prompb/remote.proto" and "prompb/types.proto" were downloaded from the Prometheus repository: +https://github.com/prometheus/prometheus/tree/main/prompb diff --git a/contrib/prometheus-protobufs/prompb/remote.proto b/contrib/prometheus-protobufs/prompb/remote.proto new file mode 100644 index 00000000000..50bb25e7fac --- /dev/null +++ b/contrib/prometheus-protobufs/prompb/remote.proto @@ -0,0 +1,88 @@ +// Copyright 2016 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +package prometheus; + +option go_package = "prompb"; + +import "prompb/types.proto"; +import "gogoproto/gogo.proto"; + +message WriteRequest { + repeated prometheus.TimeSeries timeseries = 1 [(gogoproto.nullable) = false]; + // Cortex uses this field to determine the source of the write request. + // We reserve it to avoid any compatibility issues. + reserved 2; + repeated prometheus.MetricMetadata metadata = 3 [(gogoproto.nullable) = false]; +} + +// ReadRequest represents a remote read request. +message ReadRequest { + repeated Query queries = 1; + + enum ResponseType { + // Server will return a single ReadResponse message with matched series that includes list of raw samples. + // It's recommended to use streamed response types instead. + // + // Response headers: + // Content-Type: "application/x-protobuf" + // Content-Encoding: "snappy" + SAMPLES = 0; + // Server will stream a delimited ChunkedReadResponse message that + // contains XOR or HISTOGRAM(!) encoded chunks for a single series. + // Each message is following varint size and fixed size bigendian + // uint32 for CRC32 Castagnoli checksum. + // + // Response headers: + // Content-Type: "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse" + // Content-Encoding: "" + STREAMED_XOR_CHUNKS = 1; + } + + // accepted_response_types allows negotiating the content type of the response. + // + // Response types are taken from the list in the FIFO order. If no response type in `accepted_response_types` is + // implemented by server, error is returned. + // For request that do not contain `accepted_response_types` field the SAMPLES response type will be used. + repeated ResponseType accepted_response_types = 2; +} + +// ReadResponse is a response when response_type equals SAMPLES. +message ReadResponse { + // In same order as the request's queries. + repeated QueryResult results = 1; +} + +message Query { + int64 start_timestamp_ms = 1; + int64 end_timestamp_ms = 2; + repeated prometheus.LabelMatcher matchers = 3; + prometheus.ReadHints hints = 4; +} + +message QueryResult { + // Samples within a time series must be ordered by time. + repeated prometheus.TimeSeries timeseries = 1; +} + +// ChunkedReadResponse is a response when response_type equals STREAMED_XOR_CHUNKS. +// We strictly stream full series after series, optionally split by time. This means that a single frame can contain +// partition of the single series, but once a new series is started to be streamed it means that no more chunks will +// be sent for previous one. Series are returned sorted in the same way TSDB block are internally. +message ChunkedReadResponse { + repeated prometheus.ChunkedSeries chunked_series = 1; + + // query_index represents an index of the query from ReadRequest.queries these chunks relates to. + int64 query_index = 2; +} diff --git a/contrib/prometheus-protobufs/prompb/types.proto b/contrib/prometheus-protobufs/prompb/types.proto new file mode 100644 index 00000000000..61fc1e0143e --- /dev/null +++ b/contrib/prometheus-protobufs/prompb/types.proto @@ -0,0 +1,187 @@ +// Copyright 2017 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +package prometheus; + +option go_package = "prompb"; + +import "gogoproto/gogo.proto"; + +message MetricMetadata { + enum MetricType { + UNKNOWN = 0; + COUNTER = 1; + GAUGE = 2; + HISTOGRAM = 3; + GAUGEHISTOGRAM = 4; + SUMMARY = 5; + INFO = 6; + STATESET = 7; + } + + // Represents the metric type, these match the set from Prometheus. + // Refer to github.com/prometheus/common/model/metadata.go for details. + MetricType type = 1; + string metric_family_name = 2; + string help = 4; + string unit = 5; +} + +message Sample { + double value = 1; + // timestamp is in ms format, see model/timestamp/timestamp.go for + // conversion from time.Time to Prometheus timestamp. + int64 timestamp = 2; +} + +message Exemplar { + // Optional, can be empty. + repeated Label labels = 1 [(gogoproto.nullable) = false]; + double value = 2; + // timestamp is in ms format, see model/timestamp/timestamp.go for + // conversion from time.Time to Prometheus timestamp. + int64 timestamp = 3; +} + +// A native histogram, also known as a sparse histogram. +// Original design doc: +// https://docs.google.com/document/d/1cLNv3aufPZb3fNfaJgdaRBZsInZKKIHo9E6HinJVbpM/edit +// The appendix of this design doc also explains the concept of float +// histograms. This Histogram message can represent both, the usual +// integer histogram as well as a float histogram. +message Histogram { + enum ResetHint { + UNKNOWN = 0; // Need to test for a counter reset explicitly. + YES = 1; // This is the 1st histogram after a counter reset. + NO = 2; // There was no counter reset between this and the previous Histogram. + GAUGE = 3; // This is a gauge histogram where counter resets don't happen. + } + + oneof count { // Count of observations in the histogram. + uint64 count_int = 1; + double count_float = 2; + } + double sum = 3; // Sum of observations in the histogram. + // The schema defines the bucket schema. Currently, valid numbers + // are -4 <= n <= 8. They are all for base-2 bucket schemas, where 1 + // is a bucket boundary in each case, and then each power of two is + // divided into 2^n logarithmic buckets. Or in other words, each + // bucket boundary is the previous boundary times 2^(2^-n). In the + // future, more bucket schemas may be added using numbers < -4 or > + // 8. + sint32 schema = 4; + double zero_threshold = 5; // Breadth of the zero bucket. + oneof zero_count { // Count in zero bucket. + uint64 zero_count_int = 6; + double zero_count_float = 7; + } + + // Negative Buckets. + repeated BucketSpan negative_spans = 8 [(gogoproto.nullable) = false]; + // Use either "negative_deltas" or "negative_counts", the former for + // regular histograms with integer counts, the latter for float + // histograms. + repeated sint64 negative_deltas = 9; // Count delta of each bucket compared to previous one (or to zero for 1st bucket). + repeated double negative_counts = 10; // Absolute count of each bucket. + + // Positive Buckets. + repeated BucketSpan positive_spans = 11 [(gogoproto.nullable) = false]; + // Use either "positive_deltas" or "positive_counts", the former for + // regular histograms with integer counts, the latter for float + // histograms. + repeated sint64 positive_deltas = 12; // Count delta of each bucket compared to previous one (or to zero for 1st bucket). + repeated double positive_counts = 13; // Absolute count of each bucket. + + ResetHint reset_hint = 14; + // timestamp is in ms format, see model/timestamp/timestamp.go for + // conversion from time.Time to Prometheus timestamp. + int64 timestamp = 15; +} + +// A BucketSpan defines a number of consecutive buckets with their +// offset. Logically, it would be more straightforward to include the +// bucket counts in the Span. However, the protobuf representation is +// more compact in the way the data is structured here (with all the +// buckets in a single array separate from the Spans). +message BucketSpan { + sint32 offset = 1; // Gap to previous span, or starting point for 1st span (which can be negative). + uint32 length = 2; // Length of consecutive buckets. +} + +// TimeSeries represents samples and labels for a single time series. +message TimeSeries { + // For a timeseries to be valid, and for the samples and exemplars + // to be ingested by the remote system properly, the labels field is required. + repeated Label labels = 1 [(gogoproto.nullable) = false]; + repeated Sample samples = 2 [(gogoproto.nullable) = false]; + repeated Exemplar exemplars = 3 [(gogoproto.nullable) = false]; + repeated Histogram histograms = 4 [(gogoproto.nullable) = false]; +} + +message Label { + string name = 1; + string value = 2; +} + +message Labels { + repeated Label labels = 1 [(gogoproto.nullable) = false]; +} + +// Matcher specifies a rule, which can match or set of labels or not. +message LabelMatcher { + enum Type { + EQ = 0; + NEQ = 1; + RE = 2; + NRE = 3; + } + Type type = 1; + string name = 2; + string value = 3; +} + +message ReadHints { + int64 step_ms = 1; // Query step size in milliseconds. + string func = 2; // String representation of surrounding function or aggregation. + int64 start_ms = 3; // Start time in milliseconds. + int64 end_ms = 4; // End time in milliseconds. + repeated string grouping = 5; // List of label names used in aggregation. + bool by = 6; // Indicate whether it is without or by. + int64 range_ms = 7; // Range vector selector range in milliseconds. +} + +// Chunk represents a TSDB chunk. +// Time range [min, max] is inclusive. +message Chunk { + int64 min_time_ms = 1; + int64 max_time_ms = 2; + + // We require this to match chunkenc.Encoding. + enum Encoding { + UNKNOWN = 0; + XOR = 1; + HISTOGRAM = 2; + FLOAT_HISTOGRAM = 3; + } + Encoding type = 3; + bytes data = 4; +} + +// ChunkedSeries represents single, encoded time series. +message ChunkedSeries { + // Labels should be sorted. + repeated Label labels = 1 [(gogoproto.nullable) = false]; + // Chunks will be in start time order and may overlap. + repeated Chunk chunks = 2 [(gogoproto.nullable) = false]; +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b18207e55ad..d985595154c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -607,6 +607,10 @@ if (TARGET ch_contrib::usearch) dbms_target_link_libraries(PUBLIC ch_contrib::usearch) endif() +if (TARGET ch_contrib::prometheus_protobufs) + dbms_target_link_libraries (PUBLIC ch_contrib::prometheus_protobufs) +endif() + if (TARGET ch_rust::skim) dbms_target_include_directories(PRIVATE $) dbms_target_link_libraries(PUBLIC ch_rust::skim) diff --git a/src/Common/config.h.in b/src/Common/config.h.in index ad2ca2652d1..f68701d5d10 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -63,6 +63,7 @@ #cmakedefine01 USE_BCRYPT #cmakedefine01 USE_LIBARCHIVE #cmakedefine01 USE_POCKETFFT +#cmakedefine01 USE_PROMETHEUS_PROTOBUFS /// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO. /// That's why we use absolute paths. diff --git a/src/configure_config.cmake b/src/configure_config.cmake index a3f6dae4b87..75f61baa854 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -170,5 +170,8 @@ endif() if (TARGET ch_contrib::pocketfft) set(USE_POCKETFFT 1) endif() +if (TARGET ch_contrib::prometheus_protobufs) + set(USE_PROMETHEUS_PROTOBUFS 1) +endif() set(SOURCE_DIR ${PROJECT_SOURCE_DIR}) From 1ef5bca59164bd4ec00743e0f6a5d7cf17c077ef Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 18 Feb 2024 12:46:21 +0100 Subject: [PATCH 269/273] Fix cmake function PROTOBUF_GENERATE_CPP(): now it returns correct paths in SRCS and HDRS even if input ".proto" files are located in sibling directories. --- .../protobuf_generate.cmake | 51 +++++++++++++++---- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/contrib/google-protobuf-cmake/protobuf_generate.cmake b/contrib/google-protobuf-cmake/protobuf_generate.cmake index 3e30b4e40fd..0731a81aeb8 100644 --- a/contrib/google-protobuf-cmake/protobuf_generate.cmake +++ b/contrib/google-protobuf-cmake/protobuf_generate.cmake @@ -157,15 +157,13 @@ function(protobuf_generate) set(_generated_srcs_all) foreach(_proto ${protobuf_generate_PROTOS}) - get_filename_component(_abs_file ${_proto} ABSOLUTE) - get_filename_component(_abs_dir ${_abs_file} DIRECTORY) - get_filename_component(_basename ${_proto} NAME_WE) - file(RELATIVE_PATH _rel_dir ${CMAKE_CURRENT_SOURCE_DIR} ${_abs_dir}) - - set(_possible_rel_dir) - if (NOT protobuf_generate_APPEND_PATH) - set(_possible_rel_dir ${_rel_dir}/) - endif() + # The protobuf compiler doesn't return paths to the files it generates so we have to calculate those paths here: + # _abs_file - absolute path to a .proto file, + # _possible_rel_dir - relative path to the .proto file from some import directory specified in Protobuf_IMPORT_DIRS, + # _basename - filename of the .proto file (without path and without extenstion). + get_proto_absolute_path(_abs_file "${_proto}" ${_protobuf_include_path}) + get_proto_relative_path(_possible_rel_dir "${_abs_file}" ${_protobuf_include_path}) + get_filename_component(_basename "${_abs_file}" NAME_WE) set(_generated_srcs) foreach(_ext ${protobuf_generate_GENERATE_EXTENSIONS}) @@ -173,7 +171,7 @@ function(protobuf_generate) endforeach() if(protobuf_generate_DESCRIPTORS AND protobuf_generate_LANGUAGE STREQUAL cpp) - set(_descriptor_file "${CMAKE_CURRENT_BINARY_DIR}/${_basename}.desc") + set(_descriptor_file "${protobuf_generate_PROTOC_OUT_DIR}/${_possible_rel_dir}${_basename}.desc") set(_dll_desc_out "--descriptor_set_out=${_descriptor_file}") list(APPEND _generated_srcs ${_descriptor_file}) endif() @@ -196,3 +194,36 @@ function(protobuf_generate) target_sources(${protobuf_generate_TARGET} PRIVATE ${_generated_srcs_all}) endif() endfunction() + +# Calculates the absolute path to a .proto file. +function(get_proto_absolute_path result proto) + cmake_path(IS_ABSOLUTE proto _is_abs_path) + if(_is_abs_path) + set(${result} "${proto}" PARENT_SCOPE) + return() + endif() + foreach(_include_dir ${ARGN}) + if(EXISTS "${_include_dir}/${proto}") + set(${result} "${_include_dir}/${proto}" PARENT_SCOPE) + return() + endif() + endforeach() + message(SEND_ERROR "Not found protobuf ${proto} in Protobuf_IMPORT_DIRS: ${ARGN}") +endfunction() + +# Calculates a relative path to a .proto file. The returned path is relative to one of include directories. +function(get_proto_relative_path result abs_path) + set(${result} "" PARENT_SCOPE) + get_filename_component(_abs_dir "${abs_path}" DIRECTORY) + foreach(_include_dir ${ARGN}) + cmake_path(IS_PREFIX _include_dir "${_abs_dir}" _is_prefix) + if(_is_prefix) + file(RELATIVE_PATH _rel_dir "${_include_dir}" "${_abs_dir}") + if(NOT _rel_dir STREQUAL "") + set(${result} "${_rel_dir}/" PARENT_SCOPE) + endif() + return() + endif() + endforeach() + message(WARNING "Not found protobuf ${abs_path} in Protobuf_IMPORT_DIRS: ${ARGN}") +endfunction() From d777a7a9415c9f630e8df01cd42fc0d424cd4daf Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 5 Jul 2024 08:38:09 +0100 Subject: [PATCH 270/273] Reduce one more time --- src/Core/ServerSettings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 6c62ab6def8..d473810bcb8 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -153,7 +153,7 @@ namespace DB M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ - M(Double, gwp_asan_force_sample_probability, 0.0005, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ + M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp From 77e60543fd795737dfc41f8b90cc4be7e770dcb5 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Fri, 5 Jul 2024 14:58:17 +0200 Subject: [PATCH 271/273] Revert "insertion deduplication on retries for materialised views" --- src/Columns/ColumnObject.cpp | 6 - src/Columns/ColumnObject.h | 2 +- src/Common/CollectionOfDerived.h | 184 ---- src/Core/Settings.h | 6 +- src/Interpreters/AsynchronousInsertQueue.cpp | 19 +- src/Interpreters/InterpreterCheckQuery.cpp | 18 +- src/Interpreters/InterpreterCreateQuery.cpp | 9 +- src/Interpreters/InterpreterExplainQuery.cpp | 8 +- src/Interpreters/InterpreterInsertQuery.cpp | 682 ++++++------- src/Interpreters/InterpreterInsertQuery.h | 17 +- src/Interpreters/Squashing.cpp | 124 ++- src/Interpreters/Squashing.h | 50 +- src/Interpreters/SystemLog.cpp | 8 +- src/Interpreters/TreeRewriter.cpp | 2 +- src/Processors/Chunk.cpp | 20 +- src/Processors/Chunk.h | 58 +- .../PullingAsyncPipelineExecutor.cpp | 9 +- .../Executors/PullingPipelineExecutor.cpp | 9 +- .../Formats/Impl/ParquetBlockOutputFormat.cpp | 4 +- src/Processors/IAccumulatingTransform.cpp | 5 +- .../FinishAggregatingInOrderAlgorithm.cpp | 10 +- .../Algorithms/MergeTreePartLevelInfo.h | 12 +- .../Algorithms/ReplacingSortedAlgorithm.cpp | 2 +- .../Algorithms/ReplacingSortedAlgorithm.h | 7 +- src/Processors/Merges/IMergingTransform.cpp | 2 +- src/Processors/Merges/IMergingTransform.h | 2 +- src/Processors/Sinks/RemoteSink.h | 2 +- src/Processors/Sinks/SinkToStorage.cpp | 5 +- src/Processors/Sinks/SinkToStorage.h | 5 +- src/Processors/Sources/BlocksSource.h | 5 +- src/Processors/Sources/RemoteSource.cpp | 2 +- .../Sources/SourceFromSingleChunk.cpp | 6 +- .../AggregatingInOrderTransform.cpp | 9 +- .../Transforms/AggregatingInOrderTransform.h | 5 +- .../Transforms/AggregatingTransform.cpp | 16 +- .../Transforms/AggregatingTransform.h | 3 +- .../Transforms/ApplySquashingTransform.h | 14 +- .../Transforms/CountingTransform.cpp | 3 +- .../DeduplicationTokenTransforms.cpp | 236 ----- .../Transforms/DeduplicationTokenTransforms.h | 237 ----- .../Transforms/ExpressionTransform.cpp | 2 - .../Transforms/JoiningTransform.cpp | 9 +- src/Processors/Transforms/JoiningTransform.h | 6 +- .../Transforms/MaterializingTransform.cpp | 1 - .../Transforms/MemoryBoundMerging.h | 6 +- ...gingAggregatedMemoryEfficientTransform.cpp | 36 +- ...ergingAggregatedMemoryEfficientTransform.h | 5 +- .../Transforms/MergingAggregatedTransform.cpp | 10 +- .../Transforms/PlanSquashingTransform.cpp | 15 +- .../Transforms/PlanSquashingTransform.h | 3 +- .../Transforms/SelectByIndicesTransform.h | 3 +- .../Transforms/SquashingTransform.cpp | 18 +- .../Transforms/TotalsHavingTransform.cpp | 6 +- .../Transforms/buildPushingToViewsChain.cpp | 127 +-- src/QueryPipeline/QueryPipelineBuilder.h | 2 +- src/QueryPipeline/QueryPlanResourceHolder.cpp | 8 +- src/QueryPipeline/QueryPlanResourceHolder.h | 3 - src/Server/TCPHandler.cpp | 20 +- src/Storages/Distributed/DistributedSink.cpp | 20 +- src/Storages/Distributed/DistributedSink.h | 2 +- src/Storages/FileLog/StorageFileLog.cpp | 9 +- src/Storages/Kafka/StorageKafka.cpp | 8 +- src/Storages/LiveView/LiveViewSink.h | 4 +- src/Storages/LiveView/StorageLiveView.cpp | 18 +- src/Storages/LiveView/StorageLiveView.h | 2 +- src/Storages/MaterializedView/RefreshTask.cpp | 8 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 17 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 1 - .../MergeTree/MergeTreeSelectProcessor.cpp | 6 +- .../MergeTree/MergeTreeSequentialSource.cpp | 5 +- src/Storages/MergeTree/MergeTreeSink.cpp | 68 +- src/Storages/MergeTree/MergeTreeSink.h | 3 +- src/Storages/MergeTree/MutateTask.cpp | 19 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 94 +- .../MergeTree/ReplicatedMergeTreeSink.h | 13 +- src/Storages/MessageQueueSink.cpp | 2 +- src/Storages/MessageQueueSink.h | 2 +- src/Storages/NATS/StorageNATS.cpp | 8 +- .../StorageObjectStorageSink.cpp | 4 +- .../ObjectStorage/StorageObjectStorageSink.h | 2 +- .../StorageObjectStorageQueue.cpp | 8 +- src/Storages/PartitionedSink.cpp | 4 +- src/Storages/PartitionedSink.h | 2 +- .../MaterializedPostgreSQLConsumer.cpp | 8 +- .../PostgreSQLReplicationHandler.cpp | 8 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 8 +- .../RocksDB/EmbeddedRocksDBBulkSink.cpp | 7 +- .../RocksDB/EmbeddedRocksDBBulkSink.h | 2 +- src/Storages/RocksDB/EmbeddedRocksDBSink.cpp | 2 +- src/Storages/RocksDB/EmbeddedRocksDBSink.h | 2 +- .../RocksDB/StorageEmbeddedRocksDB.cpp | 3 +- src/Storages/StorageBuffer.cpp | 10 +- src/Storages/StorageDistributed.cpp | 8 +- src/Storages/StorageFile.cpp | 4 +- src/Storages/StorageKeeperMap.cpp | 9 +- src/Storages/StorageLog.cpp | 8 +- src/Storages/StorageMemory.cpp | 2 +- src/Storages/StorageMongoDB.cpp | 5 +- src/Storages/StorageMySQL.cpp | 4 +- src/Storages/StoragePostgreSQL.cpp | 4 +- src/Storages/StorageRedis.cpp | 9 +- src/Storages/StorageSQLite.cpp | 2 +- src/Storages/StorageSet.cpp | 6 +- src/Storages/StorageStripeLog.cpp | 4 +- src/Storages/StorageURL.cpp | 4 +- src/Storages/StorageURL.h | 2 +- .../System/StorageSystemZooKeeper.cpp | 2 +- src/Storages/WindowView/StorageWindowView.cpp | 59 +- src/Storages/WindowView/StorageWindowView.h | 2 +- .../test_force_deduplication/test.py | 85 +- ...view_and_deduplication_zookeeper.reference | 2 +- ...lized_view_and_deduplication_zookeeper.sql | 2 +- ...lized_view_and_too_many_parts_zookeeper.sh | 4 +- .../0_stateless/01275_parallel_mv.reference | 4 +- ...01927_query_views_log_current_database.sql | 1 - ...ication_token_materialized_views.reference | 14 +- ...deduplication_token_materialized_views.sql | 8 +- .../0_stateless/02125_query_views_log.sql | 2 +- ...02912_ingestion_mv_deduplication.reference | 5 +- .../02912_ingestion_mv_deduplication.sql | 5 +- .../0_stateless/03008_deduplication.python | 657 ------------ ...08_deduplication_cases_from_docs.reference | 41 - .../03008_deduplication_cases_from_docs.sql | 331 ------ ...on_insert_into_partitioned_table.reference | 35 - ...lication_insert_into_partitioned_table.sql | 83 -- ...ert_several_blocks_nonreplicated.reference | 962 ------------------ ...ion_insert_several_blocks_nonreplicated.sh | 59 -- ...insert_several_blocks_replicated.reference | 962 ------------------ ...cation_insert_several_blocks_replicated.sh | 59 -- ...tes_several_blocks_nonreplicated.reference | 962 ------------------ ..._generates_several_blocks_nonreplicated.sh | 59 -- ...erates_several_blocks_replicated.reference | 962 ------------------ ..._mv_generates_several_blocks_replicated.sh | 59 -- ..._mv_into_one_table_nonreplicated.reference | 706 ------------- ...several_mv_into_one_table_nonreplicated.sh | 59 -- ...ral_mv_into_one_table_replicated.reference | 706 ------------- ...on_several_mv_into_one_table_replicated.sh | 59 -- .../03035_max_insert_threads_support.sh | 2 +- 138 files changed, 865 insertions(+), 8646 deletions(-) delete mode 100644 src/Common/CollectionOfDerived.h delete mode 100644 src/Processors/Transforms/DeduplicationTokenTransforms.cpp delete mode 100644 src/Processors/Transforms/DeduplicationTokenTransforms.h delete mode 100644 tests/queries/0_stateless/03008_deduplication.python delete mode 100644 tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference delete mode 100644 tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql delete mode 100644 tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference delete mode 100644 tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql delete mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh delete mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh delete mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh delete mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh delete mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh delete mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference delete mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index ded56b60e64..90ef974010c 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -1093,10 +1093,4 @@ void ColumnObject::finalize() checkObjectHasNoAmbiguosPaths(getKeys()); } -void ColumnObject::updateHashFast(SipHash & hash) const -{ - for (const auto & entry : subcolumns) - for (auto & part : entry->data.data) - part->updateHashFast(hash); -} } diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index b1b8827622f..e2936b27994 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -242,7 +242,7 @@ public: const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); } - void updateHashFast(SipHash & hash) const override; + void updateHashFast(SipHash &) const override { throwMustBeConcrete(); } void expand(const Filter &, bool) override { throwMustBeConcrete(); } bool hasEqualValues() const override { throwMustBeConcrete(); } size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); } diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h deleted file mode 100644 index 97c0c3fbc06..00000000000 --- a/src/Common/CollectionOfDerived.h +++ /dev/null @@ -1,184 +0,0 @@ -#pragma once - -#include - -#include - -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -/* This is a collections of objects derived from ItemBase. -* Collection contains no more than one instance for each derived type. -* The derived type is used to access the instance. -*/ - -template -class CollectionOfDerivedItems -{ -public: - using Self = CollectionOfDerivedItems; - using ItemPtr = std::shared_ptr; - -private: - struct Rec - { - std::type_index type_idx; - ItemPtr ptr; - - bool operator<(const Rec & other) const - { - return type_idx < other.type_idx; - } - - bool operator<(const std::type_index & value) const - { - return type_idx < value; - } - - bool operator==(const Rec & other) const - { - return type_idx == other.type_idx; - } - }; - using Records = std::vector; - -public: - void swap(Self & other) noexcept - { - records.swap(other.records); - } - - void clear() - { - records.clear(); - } - - bool empty() const - { - return records.empty(); - } - - size_t size() const - { - return records.size(); - } - - Self clone() const - { - Self result; - result.records.reserve(records.size()); - for (const auto & rec : records) - result.records.emplace_back(rec.type_idx, rec.ptr->clone()); - return result; - } - - void append(Self && other) - { - auto middle_idx = records.size(); - std::move(other.records.begin(), other.records.end(), std::back_inserter(records)); - std::inplace_merge(records.begin(), records.begin() + middle_idx, records.end()); - chassert(isUniqTypes()); - } - - template - void add(std::shared_ptr info) - { - static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); - return addImpl(std::type_index(typeid(T)), std::move(info)); - } - - template - std::shared_ptr get() const - { - static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); - auto it = getImpl(std::type_index(typeid(T))); - if (it == records.cend()) - return nullptr; - auto cast = std::dynamic_pointer_cast(it->ptr); - chassert(cast); - return cast; - } - - template - std::shared_ptr extract() - { - static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); - auto it = getImpl(std::type_index(typeid(T))); - if (it == records.cend()) - return nullptr; - auto cast = std::dynamic_pointer_cast(it->ptr); - chassert(cast); - - records.erase(it); - return cast; - } - - std::string debug() const - { - std::string result; - - for (auto & rec : records) - { - result.append(rec.type_idx.name()); - result.append(" "); - } - - return result; - } - -private: - bool isUniqTypes() const - { - auto uniq_it = std::adjacent_find(records.begin(), records.end()); - - return uniq_it == records.end(); - } - - void addImpl(std::type_index type_idx, ItemPtr item) - { - auto it = std::lower_bound(records.begin(), records.end(), type_idx); - - if (it == records.end()) - { - records.emplace_back(type_idx, item); - return; - } - - if (it->type_idx == type_idx) - throw Exception(ErrorCodes::LOGICAL_ERROR, "inserted items must be unique by their type, type {} is inserted twice", type_idx.name()); - - - records.emplace(it, type_idx, item); - - chassert(isUniqTypes()); - } - - Records::const_iterator getImpl(std::type_index type_idx) const - { - auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx); - - if (it == records.cend()) - return records.cend(); - - if (it->type_idx != type_idx) - return records.cend(); - - return it; - } - - Records records; -}; - -} diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 81d0aa0c51d..5903dbd32eb 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -36,7 +36,7 @@ class IColumn; M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ - M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size in rows for reading", 0) \ + M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \ M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \ M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \ @@ -634,8 +634,9 @@ class IColumn; M(Bool, optimize_time_filter_with_preimage, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')", 0) \ M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ - M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \ + M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ + M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ @@ -952,7 +953,6 @@ class IColumn; #define OBSOLETE_SETTINGS(M, ALIAS) \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ - MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 1) \ MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \ MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \ MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \ diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index dd1166a9228..d72f3d81549 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -301,13 +301,7 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const auto & insert_query = query->as(); insert_query.async_insert_flush = true; - InterpreterInsertQuery interpreter( - query, - query_context, - query_context->getSettingsRef().insert_allow_materialized_columns, - /* no_squash */ false, - /* no_destination */ false, - /* async_insert */ false); + InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns); auto table = interpreter.getTable(insert_query); auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); @@ -787,12 +781,7 @@ try try { interpreter = std::make_unique( - key.query, - insert_context, - key.settings.insert_allow_materialized_columns, - false, - false, - true); + key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true); pipeline = interpreter->execute().pipeline; chassert(pipeline.pushing()); @@ -1011,7 +1000,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( } Chunk chunk(executor.getResultColumns(), total_rows); - chunk.getChunkInfos().add(std::move(chunk_info)); + chunk.setChunkInfo(std::move(chunk_info)); return chunk; } @@ -1063,7 +1052,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries( } Chunk chunk(std::move(result_columns), total_rows); - chunk.getChunkInfos().add(std::move(chunk_info)); + chunk.setChunkInfo(std::move(chunk_info)); return chunk; } diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index 81bb6290acb..4a84a7bf570 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -2,7 +2,6 @@ #include #include -#include #include @@ -23,7 +22,6 @@ #include #include -#include #include #include #include @@ -93,7 +91,7 @@ Chunk getChunkFromCheckResult(const String & database, const String & table, con return Chunk(std::move(columns), 1); } -class TableCheckTask : public ChunkInfoCloneable +class TableCheckTask : public ChunkInfo { public: TableCheckTask(StorageID table_id, const std::variant & partition_or_part, ContextPtr context) @@ -112,12 +110,6 @@ public: context->checkAccess(AccessType::SHOW_TABLES, table_->getStorageID()); } - TableCheckTask(const TableCheckTask & other) - : table(other.table) - , check_data_tasks(other.check_data_tasks) - , is_finished(other.is_finished.load()) - {} - std::optional checkNext() const { if (isFinished()) @@ -129,8 +121,8 @@ public: std::this_thread::sleep_for(sleep_time); }); - IStorage::DataValidationTasksPtr tmp = check_data_tasks; - auto result = table->checkDataNext(tmp); + IStorage::DataValidationTasksPtr check_data_tasks_ = check_data_tasks; + auto result = table->checkDataNext(check_data_tasks_); is_finished = !result.has_value(); return result; } @@ -188,7 +180,7 @@ protected: /// source should return at least one row to start pipeline result.addColumn(ColumnUInt8::create(1, 1)); /// actual data stored in chunk info - result.getChunkInfos().add(std::move(current_check_task)); + result.setChunkInfo(std::move(current_check_task)); return result; } @@ -288,7 +280,7 @@ public: protected: void transform(Chunk & chunk) override { - auto table_check_task = chunk.getChunkInfos().get(); + auto table_check_task = std::dynamic_pointer_cast(chunk.getChunkInfo()); auto check_result = table_check_task->checkNext(); if (!check_result) { diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ee191c02ff8..0ee2bb6c0e9 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1776,13 +1776,8 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) else insert->select = create.select->clone(); - return InterpreterInsertQuery( - insert, - getContext(), - getContext()->getSettingsRef().insert_allow_materialized_columns, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false).execute(); + return InterpreterInsertQuery(insert, getContext(), + getContext()->getSettingsRef().insert_allow_materialized_columns).execute(); } return {}; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 26b7e074fdf..7c7b4b3f95a 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -534,13 +534,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl() } else if (dynamic_cast(ast.getExplainedQuery().get())) { - InterpreterInsertQuery insert( - ast.getExplainedQuery(), - getContext(), - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext()); auto io = insert.execute(); printPipeline(io.pipeline.getProcessors(), buf); } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2cbfc55d008..f396db70d21 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -27,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -40,7 +38,6 @@ #include #include #include -#include "base/defines.h" namespace ProfileEvents @@ -397,358 +394,28 @@ Chain InterpreterInsertQuery::buildPreSinkChain( return out; } -std::pair, std::vector> InterpreterInsertQuery::buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) -{ - chassert(presink_streams > 0); - chassert(sink_streams > 0); - - ThreadGroupPtr running_group; - if (current_thread) - running_group = current_thread->getThreadGroup(); - if (!running_group) - running_group = std::make_shared(getContext()); - - std::vector sink_chains; - std::vector presink_chains; - - for (size_t i = 0; i < sink_streams; ++i) - { - auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, - running_group, /* elapsed_counter_ms= */ nullptr); - - sink_chains.emplace_back(std::move(out)); - } - - for (size_t i = 0; i < presink_streams; ++i) - { - auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); - presink_chains.emplace_back(std::move(out)); - } - - return {std::move(presink_chains), std::move(sink_chains)}; -} - - -QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table) -{ - const Settings & settings = getContext()->getSettingsRef(); - - auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); - - bool is_trivial_insert_select = false; - - if (settings.optimize_trivial_insert_select) - { - const auto & select_query = query.select->as(); - const auto & selects = select_query.list_of_selects->children; - const auto & union_modes = select_query.list_of_modes; - - /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries - const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; - - is_trivial_insert_select = - std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) - && std::all_of(selects.begin(), selects.end(), isTrivialSelect); - } - - ContextPtr select_context = getContext(); - - if (is_trivial_insert_select) - { - /** When doing trivial INSERT INTO ... SELECT ... FROM table, - * don't need to process SELECT with more than max_insert_threads - * and it's reasonable to set block size for SELECT to the desired block size for INSERT - * to avoid unnecessary squashing. - */ - - Settings new_settings = select_context->getSettings(); - - new_settings.max_threads = std::max(1, settings.max_insert_threads); - - if (table->prefersLargeBlocks()) - { - if (settings.min_insert_block_size_rows) - new_settings.max_block_size = settings.min_insert_block_size_rows; - if (settings.min_insert_block_size_bytes) - new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; - } - - auto context_for_trivial_select = Context::createCopy(context); - context_for_trivial_select->setSettings(new_settings); - context_for_trivial_select->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); - - select_context = context_for_trivial_select; - } - - QueryPipelineBuilder pipeline; - - { - auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); - - if (settings.allow_experimental_analyzer) - { - InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, select_context, select_query_options); - pipeline = interpreter_select_analyzer.buildQueryPipeline(); - } - else - { - InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options); - pipeline = interpreter_select.buildQueryPipeline(); - } - } - - pipeline.dropTotalsAndExtremes(); - - /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. - if (getContext()->getSettingsRef().insert_null_as_default) - { - const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); - const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); - const auto & output_columns = metadata_snapshot->getColumns(); - - if (input_columns.size() == query_columns.size()) - { - for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) - { - /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with - /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. - if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) - && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) - && !isVariant(query_columns[col_idx].type) - && !isDynamic(query_columns[col_idx].type) - && output_columns.has(query_columns[col_idx].name)) - { - query_sample_block.setColumn( - col_idx, - ColumnWithTypeAndName( - makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), - makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), - query_columns[col_idx].name)); - } - } - } - } - - auto actions_dag = ActionsDAG::makeConvertingActions( - pipeline.getHeader().getColumnsWithTypeAndName(), - query_sample_block.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header, actions); - }); - - /// We need to convert Sparse columns to full, because it's destination storage - /// may not support it or may have different settings for applying Sparse serialization. - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - auto context_ptr = getContext(); - auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - - return counting; - }); - - size_t num_select_threads = pipeline.getNumThreads(); - - pipeline.resize(1); - - if (shouldAddSquashingFroStorage(table)) - { - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size, - table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL); - }); - } - - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - if (!settings.insert_deduplication_token.value.empty()) - { - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(settings.insert_deduplication_token.value, in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - } - - /// Number of streams works like this: - /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever - /// InterpreterSelectQuery ends up with. - /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. - /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. - /// * If the table supports parallel inserts, use max_insert_threads for writing to IStorage. - /// Otherwise ResizeProcessor them down to 1 stream. - - size_t presink_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); - - size_t sink_streams_size = table->supportsParallelInsert() ? std::max(1, settings.max_insert_threads) : 1; - - if (!settings.parallel_view_processing) - { - auto table_id = table->getStorageID(); - auto views = DatabaseCatalog::instance().getDependentViews(table_id); - - if (table->isView() || !views.empty()) - sink_streams_size = 1; - } - - auto [presink_chains, sink_chains] = buildPreAndSinkChains( - presink_streams_size, sink_streams_size, - table, metadata_snapshot, query_sample_block); - - pipeline.resize(presink_chains.size()); - - if (shouldAddSquashingFroStorage(table)) - { - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size, - table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL); - }); - } - - for (auto & chain : presink_chains) - pipeline.addResources(chain.detachResources()); - pipeline.addChains(std::move(presink_chains)); - - pipeline.resize(sink_streams_size); - - for (auto & chain : sink_chains) - pipeline.addResources(chain.detachResources()); - pipeline.addChains(std::move(sink_chains)); - - if (!settings.parallel_view_processing) - { - /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. - if (pipeline.getNumThreads() > num_select_threads) - pipeline.setMaxThreads(num_select_threads); - } - else if (pipeline.getNumThreads() < settings.max_threads) - { - /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, - /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. - /// - /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. - pipeline.setMaxThreads(settings.max_threads); - } - - pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr - { - return std::make_shared(cur_header); - }); - - return QueryPipelineBuilder::getPipeline(std::move(pipeline)); -} - - -QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query, StoragePtr table) -{ - const Settings & settings = getContext()->getSettingsRef(); - - auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); - - Chain chain; - - { - auto [presink_chains, sink_chains] = buildPreAndSinkChains( - /* presink_streams */1, /* sink_streams */1, - table, metadata_snapshot, query_sample_block); - - chain = std::move(presink_chains.front()); - chain.appendChain(std::move(sink_chains.front())); - } - - if (!settings.insert_deduplication_token.value.empty()) - { - chain.addSource(std::make_shared(chain.getInputHeader())); - chain.addSource(std::make_shared(settings.insert_deduplication_token.value, chain.getInputHeader())); - } - - chain.addSource(std::make_shared(chain.getInputHeader())); - - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - auto squashing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(squashing)); - - auto balancing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(balancing)); - } - - auto context_ptr = getContext(); - auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - chain.addSource(std::move(counting)); - - QueryPipeline pipeline = QueryPipeline(std::move(chain)); - - pipeline.setNumThreads(std::min(pipeline.getNumThreads(), settings.max_threads)); - pipeline.setConcurrencyControl(settings.use_concurrency_control); - - if (query.hasInlinedData() && !async_insert) - { - /// can execute without additional data - auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); - for (auto && buffer : owned_buffers) - format->addBuffer(std::move(buffer)); - - auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); - pipeline.complete(std::move(pipe)); - } - - return pipeline; -} - - BlockIO InterpreterInsertQuery::execute() { const Settings & settings = getContext()->getSettingsRef(); auto & query = query_ptr->as(); + QueryPipelineBuilder pipeline; + std::optional distributed_pipeline; + QueryPlanResourceHolder resources; StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); + StoragePtr inner_table; + if (const auto * mv = dynamic_cast(table.get())) + inner_table = mv->getTargetTable(); + if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout); - auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); /// For table functions we check access while executing @@ -756,43 +423,320 @@ BlockIO InterpreterInsertQuery::execute() if (!query.table_function) getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); - if (!allow_materialized) + if (query.select && settings.parallel_distributed_insert_select) + // Distributed INSERT SELECT + distributed_pipeline = table->distributedWrite(query, getContext()); + + std::vector presink_chains; + std::vector sink_chains; + if (!distributed_pipeline) { - for (const auto & column : metadata_snapshot->getColumns()) - if (column.default_desc.kind == ColumnDefaultKind::Materialized && query_sample_block.has(column.name)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); + /// Number of streams works like this: + /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever + /// InterpreterSelectQuery ends up with. + /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. + /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. + /// * If the table supports parallel inserts, use the same streams for writing to IStorage. + /// Otherwise ResizeProcessor them down to 1 stream. + /// * If it's not an INSERT SELECT, forget all that and use one stream. + size_t pre_streams_size = 1; + size_t sink_streams_size = 1; + + if (query.select) + { + bool is_trivial_insert_select = false; + + if (settings.optimize_trivial_insert_select) + { + const auto & select_query = query.select->as(); + const auto & selects = select_query.list_of_selects->children; + const auto & union_modes = select_query.list_of_modes; + + /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries + const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; + + is_trivial_insert_select = + std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) + && std::all_of(selects.begin(), selects.end(), isTrivialSelect); + } + + if (is_trivial_insert_select) + { + /** When doing trivial INSERT INTO ... SELECT ... FROM table, + * don't need to process SELECT with more than max_insert_threads + * and it's reasonable to set block size for SELECT to the desired block size for INSERT + * to avoid unnecessary squashing. + */ + + Settings new_settings = getContext()->getSettings(); + + new_settings.max_threads = std::max(1, settings.max_insert_threads); + + if (table->prefersLargeBlocks()) + { + if (settings.min_insert_block_size_rows) + new_settings.max_block_size = settings.min_insert_block_size_rows; + if (settings.min_insert_block_size_bytes) + new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; + } + + auto new_context = Context::createCopy(context); + new_context->setSettings(new_settings); + new_context->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); + + auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); + + if (settings.allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, new_context, select_query_options); + pipeline = interpreter_select_analyzer.buildQueryPipeline(); + } + else + { + InterpreterSelectWithUnionQuery interpreter_select(query.select, new_context, select_query_options); + pipeline = interpreter_select.buildQueryPipeline(); + } + } + else + { + /// Passing 1 as subquery_depth will disable limiting size of intermediate result. + auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); + + if (settings.allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, getContext(), select_query_options); + pipeline = interpreter_select_analyzer.buildQueryPipeline(); + } + else + { + InterpreterSelectWithUnionQuery interpreter_select(query.select, getContext(), select_query_options); + pipeline = interpreter_select.buildQueryPipeline(); + } + } + + pipeline.dropTotalsAndExtremes(); + + if (settings.max_insert_threads > 1) + { + auto table_id = table->getStorageID(); + auto views = DatabaseCatalog::instance().getDependentViews(table_id); + + /// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them. + /// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts. + const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert(); + pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads + : std::min(settings.max_insert_threads, pipeline.getNumStreams()); + + /// Deduplication when passing insert_deduplication_token breaks if using more than one thread + if (!settings.insert_deduplication_token.toString().empty()) + { + LOG_DEBUG( + getLogger("InsertQuery"), + "Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues"); + pre_streams_size = 1; + } + + if (table->supportsParallelInsert()) + sink_streams_size = pre_streams_size; + } + + pipeline.resize(pre_streams_size); + + /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. + if (getContext()->getSettingsRef().insert_null_as_default) + { + const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); + const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); + const auto & output_columns = metadata_snapshot->getColumns(); + + if (input_columns.size() == query_columns.size()) + { + for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) + { + /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with + /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. + if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) + && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) + && !isVariant(query_columns[col_idx].type) + && !isDynamic(query_columns[col_idx].type) + && output_columns.has(query_columns[col_idx].name)) + query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name)); + } + } + } + } + + ThreadGroupPtr running_group; + if (current_thread) + running_group = current_thread->getThreadGroup(); + if (!running_group) + running_group = std::make_shared(getContext()); + for (size_t i = 0; i < sink_streams_size; ++i) + { + auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, + running_group, /* elapsed_counter_ms= */ nullptr); + sink_chains.emplace_back(std::move(out)); + } + for (size_t i = 0; i < pre_streams_size; ++i) + { + auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); + presink_chains.emplace_back(std::move(out)); + } } BlockIO res; - if (query.select) + /// What type of query: INSERT or INSERT SELECT or INSERT WATCH? + if (distributed_pipeline) { - if (settings.parallel_distributed_insert_select) + res.pipeline = std::move(*distributed_pipeline); + } + else if (query.select) + { + const auto & header = presink_chains.at(0).getInputHeader(); + auto actions_dag = ActionsDAG::makeConvertingActions( + pipeline.getHeader().getColumnsWithTypeAndName(), + header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - auto distributed = table->distributedWrite(query, getContext()); - if (distributed) - { - res.pipeline = std::move(*distributed); - } - else - { - res.pipeline = buildInsertSelectPipeline(query, table); - } - } - else + return std::make_shared(in_header, actions); + }); + + /// We need to convert Sparse columns to full, because it's destination storage + /// may not support it or may have different settings for applying Sparse serialization. + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - res.pipeline = buildInsertSelectPipeline(query, table); + return std::make_shared(in_header); + }); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + auto context_ptr = getContext(); + auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + + return counting; + }); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + size_t threads = presink_chains.size(); + + pipeline.resize(1); + + pipeline.addTransform(std::make_shared( + header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + + pipeline.resize(threads); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + }); } + + size_t num_select_threads = pipeline.getNumThreads(); + + for (auto & chain : presink_chains) + resources = chain.detachResources(); + for (auto & chain : sink_chains) + resources = chain.detachResources(); + + pipeline.addChains(std::move(presink_chains)); + pipeline.resize(sink_chains.size()); + pipeline.addChains(std::move(sink_chains)); + + if (!settings.parallel_view_processing) + { + /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. + if (pipeline.getNumThreads() > num_select_threads) + pipeline.setMaxThreads(num_select_threads); + } + else if (pipeline.getNumThreads() < settings.max_threads) + { + /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, + /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. + /// + /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. + pipeline.setMaxThreads(settings.max_threads); + } + + pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr + { + return std::make_shared(cur_header); + }); + + if (!allow_materialized) + { + for (const auto & column : metadata_snapshot->getColumns()) + if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); + } + + res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); } else { - res.pipeline = buildInsertPipeline(query, table); + auto & chain = presink_chains.at(0); + chain.appendChain(std::move(sink_chains.at(0))); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + auto squashing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(squashing)); + + auto balancing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(balancing)); + } + + auto context_ptr = getContext(); + auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + chain.addSource(std::move(counting)); + + res.pipeline = QueryPipeline(std::move(presink_chains[0])); + res.pipeline.setNumThreads(std::min(res.pipeline.getNumThreads(), settings.max_threads)); + res.pipeline.setConcurrencyControl(settings.use_concurrency_control); + + if (query.hasInlinedData() && !async_insert) + { + /// can execute without additional data + auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); + for (auto && buffer : owned_buffers) + format->addBuffer(std::move(buffer)); + + auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); + res.pipeline.complete(std::move(pipe)); + } } - res.pipeline.addStorageHolder(table); + res.pipeline.addResources(std::move(resources)); - if (const auto * mv = dynamic_cast(table.get())) - res.pipeline.addStorageHolder(mv->getTargetTable()); + res.pipeline.addStorageHolder(table); + if (inner_table) + res.pipeline.addStorageHolder(inner_table); return res; } @@ -813,27 +757,17 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, Cont } } - void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const { extendQueryLogElemImpl(elem, context_); } - void registerInterpreterInsertQuery(InterpreterFactory & factory) { auto create_fn = [] (const InterpreterFactory::Arguments & args) { - return std::make_unique( - args.query, - args.context, - args.allow_materialized, - /* no_squash */false, - /* no_destination */false, - /* async_insert */false); + return std::make_unique(args.query, args.context, args.allow_materialized); }; factory.registerInterpreter("InterpreterInsertQuery", create_fn); } - - } diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 894c7c42144..bf73fb2a319 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -23,10 +23,10 @@ public: InterpreterInsertQuery( const ASTPtr & query_ptr_, ContextPtr context_, - bool allow_materialized_, - bool no_squash_, - bool no_destination, - bool async_insert_); + bool allow_materialized_ = false, + bool no_squash_ = false, + bool no_destination_ = false, + bool async_insert_ = false); /** Prepare a request for execution. Return block streams * - the stream into which you can write data to execute the query, if INSERT; @@ -73,17 +73,12 @@ private: ASTPtr query_ptr; const bool allow_materialized; - bool no_squash = false; - bool no_destination = false; + const bool no_squash; + const bool no_destination; const bool async_insert; std::vector> owned_buffers; - std::pair, std::vector> buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); - - QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table); - QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table); - Chain buildSink( const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 25434d1103e..f8b6a6542cc 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,7 +1,6 @@ #include #include #include -#include namespace DB @@ -12,33 +11,24 @@ namespace ErrorCodes } Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) - : min_block_size_rows(min_block_size_rows_) + : header(header_) + , min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) - , header(header_) { } Chunk Squashing::flush() { - if (!accumulated) - return {}; - - auto result = convertToChunk(accumulated.extract()); - chassert(result); - return result; + return convertToChunk(std::move(chunks_to_merge_vec)); } Chunk Squashing::squash(Chunk && input_chunk) { - if (!input_chunk) + if (!input_chunk.hasChunkInfo()) return Chunk(); - auto squash_info = input_chunk.getChunkInfos().extract(); - - if (!squash_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); - - return squash(std::move(squash_info->chunks), std::move(input_chunk.getChunkInfos())); + const auto *info = getInfoFromChunk(input_chunk); + return squash(info->chunks); } Chunk Squashing::add(Chunk && input_chunk) @@ -47,37 +37,48 @@ Chunk Squashing::add(Chunk && input_chunk) return {}; /// Just read block is already enough. - if (isEnoughSize(input_chunk)) + if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes())) { /// If no accumulated data, return just read block. - if (!accumulated) + if (chunks_to_merge_vec.empty()) { - accumulated.add(std::move(input_chunk)); - return convertToChunk(accumulated.extract()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + return res_chunk; } /// Return accumulated data (maybe it has small size) and place new block to accumulated data. - Chunk res_chunk = convertToChunk(accumulated.extract()); - accumulated.add(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); return res_chunk; } /// Accumulated block is already enough. - if (isEnoughSize()) + if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) { /// Return accumulated data and place new block to accumulated data. - Chunk res_chunk = convertToChunk(accumulated.extract()); - accumulated.add(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); return res_chunk; } /// Pushing data into accumulating vector - accumulated.add(std::move(input_chunk)); + expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); /// If accumulated data is big enough, we send it - if (isEnoughSize()) - return convertToChunk(accumulated.extract()); - + if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + { + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + changeCurrentSize(0, 0); + chunks_to_merge_vec.clear(); + return res_chunk; + } return {}; } @@ -89,15 +90,14 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const auto info = std::make_shared(); info->chunks = std::move(chunks); - // It is imortant that chunk is not empty, it has to have columns even if they are empty - auto aggr_chunk = Chunk(header.getColumns(), 0); - aggr_chunk.getChunkInfos().add(std::move(info)); - chassert(aggr_chunk); - return aggr_chunk; + chunks.clear(); + + return Chunk(header.cloneEmptyColumns(), 0, info); } -Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos) +Chunk Squashing::squash(std::vector & input_chunks) { + Chunk accumulated_chunk; std::vector mutable_columns = {}; size_t rows = 0; for (const Chunk & chunk : input_chunks) @@ -119,17 +119,35 @@ Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoColl for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) { const auto source_column = columns[j]; + mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); } } + accumulated_chunk.setColumns(std::move(mutable_columns), rows); + return accumulated_chunk; +} - Chunk result; - result.setColumns(std::move(mutable_columns), rows); - result.setChunkInfos(infos); - result.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos())); +const ChunksToSquash* Squashing::getInfoFromChunk(const Chunk & chunk) +{ + const auto& info = chunk.getChunkInfo(); + const auto * agg_info = typeid_cast(info.get()); - chassert(result); - return result; + if (!agg_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); + + return agg_info; +} + +void Squashing::expandCurrentSize(size_t rows, size_t bytes) +{ + accumulated_size.rows += rows; + accumulated_size.bytes += bytes; +} + +void Squashing::changeCurrentSize(size_t rows, size_t bytes) +{ + accumulated_size.rows = rows; + accumulated_size.bytes = bytes; } bool Squashing::isEnoughSize(size_t rows, size_t bytes) const @@ -138,28 +156,4 @@ bool Squashing::isEnoughSize(size_t rows, size_t bytes) const || (min_block_size_rows && rows >= min_block_size_rows) || (min_block_size_bytes && bytes >= min_block_size_bytes); } - -bool Squashing::isEnoughSize() const -{ - return isEnoughSize(accumulated.getRows(), accumulated.getBytes()); -}; - -bool Squashing::isEnoughSize(const Chunk & chunk) const -{ - return isEnoughSize(chunk.getNumRows(), chunk.bytes()); -} - -void Squashing::CurrentSize::add(Chunk && chunk) -{ - rows += chunk.getNumRows(); - bytes += chunk.bytes(); - chunks.push_back(std::move(chunk)); -} - -std::vector Squashing::CurrentSize::extract() -{ - auto result = std::move(chunks); - *this = {}; - return result; -} } diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 64a9768a71f..d76cca60e41 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -8,18 +8,9 @@ namespace DB { -class ChunksToSquash : public ChunkInfoCloneable +struct ChunksToSquash : public ChunkInfo { -public: - ChunksToSquash() = default; - ChunksToSquash(const ChunksToSquash & other) - { - chunks.reserve(other.chunks.size()); - for (const auto & chunk: other.chunks) - chunks.push_back(chunk.clone()); - } - - std::vector chunks = {}; + mutable std::vector chunks = {}; }; /** Merging consecutive passed blocks to specified minimum size. @@ -45,35 +36,32 @@ public: static Chunk squash(Chunk && input_chunk); Chunk flush(); - void setHeader(Block header_) { header = std::move(header_); } - const Block & getHeader() const { return header; } - -private: - class CurrentSize + bool isDataLeft() + { + return !chunks_to_merge_vec.empty(); + } + + Block header; +private: + struct CurrentSize { - std::vector chunks = {}; size_t rows = 0; size_t bytes = 0; - - public: - explicit operator bool () const { return !chunks.empty(); } - size_t getRows() const { return rows; } - size_t getBytes() const { return bytes; } - void add(Chunk && chunk); - std::vector extract(); }; - const size_t min_block_size_rows; - const size_t min_block_size_bytes; - Block header; + std::vector chunks_to_merge_vec = {}; + size_t min_block_size_rows; + size_t min_block_size_bytes; - CurrentSize accumulated; + CurrentSize accumulated_size; - static Chunk squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos); + static const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); - bool isEnoughSize() const; + static Chunk squash(std::vector & input_chunks); + + void expandCurrentSize(size_t rows, size_t bytes); + void changeCurrentSize(size_t rows, size_t bytes); bool isEnoughSize(size_t rows, size_t bytes) const; - bool isEnoughSize(const Chunk & chunk) const; Chunk convertToChunk(std::vector && chunks) const; }; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index f386e157b14..557065b23ff 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -538,13 +538,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, insert_context->makeQueryContext(); addSettingsForQuery(insert_context, IAST::QueryKind::Insert); - InterpreterInsertQuery interpreter( - query_ptr, - insert_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(query_ptr, insert_context); BlockIO io = interpreter.execute(); PushingPipelineExecutor executor(io.pipeline); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 6ce6f5e454e..a3c5a7ed3ed 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1188,7 +1188,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select } } - /// Check for dynamic subcolumns in unknown required columns. + /// Check for dynamic subcolums in unknown required columns. if (!unknown_required_source_columns.empty()) { for (const NameAndTypePair & pair : source_columns_ordinary) diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 4466be5b3a7..5f6cf2f7230 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -19,6 +19,14 @@ Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns checkNumRowsIsConsistent(); } +Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) + : columns(std::move(columns_)) + , num_rows(num_rows_) + , chunk_info(std::move(chunk_info_)) +{ + checkNumRowsIsConsistent(); +} + static Columns unmuteColumns(MutableColumns && mutable_columns) { Columns columns; @@ -35,11 +43,17 @@ Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_) checkNumRowsIsConsistent(); } +Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) + : columns(unmuteColumns(std::move(columns_))) + , num_rows(num_rows_) + , chunk_info(std::move(chunk_info_)) +{ + checkNumRowsIsConsistent(); +} + Chunk Chunk::clone() const { - auto tmp = Chunk(getColumns(), getNumRows()); - tmp.setChunkInfos(chunk_infos.clone()); - return tmp; + return Chunk(getColumns(), getNumRows(), chunk_info); } void Chunk::setColumns(Columns columns_, UInt64 num_rows_) diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 1348966c0d3..4f753798eaa 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -1,9 +1,7 @@ #pragma once -#include #include - -#include +#include namespace DB { @@ -11,29 +9,11 @@ namespace DB class ChunkInfo { public: - using Ptr = std::shared_ptr; - - ChunkInfo() = default; - ChunkInfo(const ChunkInfo&) = default; - ChunkInfo(ChunkInfo&&) = default; - - virtual Ptr clone() const = 0; virtual ~ChunkInfo() = default; + ChunkInfo() = default; }; - -template -class ChunkInfoCloneable : public ChunkInfo -{ -public: - ChunkInfoCloneable() = default; - ChunkInfoCloneable(const ChunkInfoCloneable & other) = default; - - Ptr clone() const override - { - return std::static_pointer_cast(std::make_shared(*static_cast(this))); - } -}; +using ChunkInfoPtr = std::shared_ptr; /** * Chunk is a list of columns with the same length. @@ -52,26 +32,26 @@ public: class Chunk { public: - using ChunkInfoCollection = CollectionOfDerivedItems; - Chunk() = default; Chunk(const Chunk & other) = delete; Chunk(Chunk && other) noexcept : columns(std::move(other.columns)) , num_rows(other.num_rows) - , chunk_infos(std::move(other.chunk_infos)) + , chunk_info(std::move(other.chunk_info)) { other.num_rows = 0; } Chunk(Columns columns_, UInt64 num_rows_); + Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk(MutableColumns columns_, UInt64 num_rows_); + Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk & operator=(const Chunk & other) = delete; Chunk & operator=(Chunk && other) noexcept { columns = std::move(other.columns); - chunk_infos = std::move(other.chunk_infos); + chunk_info = std::move(other.chunk_info); num_rows = other.num_rows; other.num_rows = 0; return *this; @@ -82,15 +62,15 @@ public: void swap(Chunk & other) noexcept { columns.swap(other.columns); + chunk_info.swap(other.chunk_info); std::swap(num_rows, other.num_rows); - chunk_infos.swap(other.chunk_infos); } void clear() { num_rows = 0; columns.clear(); - chunk_infos.clear(); + chunk_info.reset(); } const Columns & getColumns() const { return columns; } @@ -101,9 +81,9 @@ public: /** Get empty columns with the same types as in block. */ MutableColumns cloneEmptyColumns() const; - ChunkInfoCollection & getChunkInfos() { return chunk_infos; } - const ChunkInfoCollection & getChunkInfos() const { return chunk_infos; } - void setChunkInfos(ChunkInfoCollection chunk_infos_) { chunk_infos = std::move(chunk_infos_); } + const ChunkInfoPtr & getChunkInfo() const { return chunk_info; } + bool hasChunkInfo() const { return chunk_info != nullptr; } + void setChunkInfo(ChunkInfoPtr chunk_info_) { chunk_info = std::move(chunk_info_); } UInt64 getNumRows() const { return num_rows; } UInt64 getNumColumns() const { return columns.size(); } @@ -127,7 +107,7 @@ public: private: Columns columns; UInt64 num_rows = 0; - ChunkInfoCollection chunk_infos; + ChunkInfoPtr chunk_info; void checkNumRowsIsConsistent(); }; @@ -137,15 +117,11 @@ using Chunks = std::vector; /// AsyncInsert needs two kinds of information: /// - offsets of different sub-chunks /// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`. -class AsyncInsertInfo : public ChunkInfoCloneable +class AsyncInsertInfo : public ChunkInfo { public: AsyncInsertInfo() = default; - AsyncInsertInfo(const AsyncInsertInfo & other) = default; - AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) - : offsets(offsets_) - , tokens(tokens_) - {} + explicit AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) : offsets(offsets_), tokens(tokens_) {} std::vector offsets; std::vector tokens; @@ -154,11 +130,9 @@ public: using AsyncInsertInfoPtr = std::shared_ptr; /// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults. -class ChunkMissingValues : public ChunkInfoCloneable +class ChunkMissingValues : public ChunkInfo { public: - ChunkMissingValues(const ChunkMissingValues & other) = default; - using RowsBitMask = std::vector; /// a bit per row for a column const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index d9fab88fe1f..d27002197d2 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -147,10 +147,13 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto agg_info = chunk.getChunkInfos().get()) + if (auto chunk_info = chunk.getChunkInfo()) { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; + if (const auto * agg_info = typeid_cast(chunk_info.get())) + { + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; + } } return true; diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index 25c15d40c9a..cbf73c5cb07 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -73,10 +73,13 @@ bool PullingPipelineExecutor::pull(Block & block) } block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto agg_info = chunk.getChunkInfos().get()) + if (auto chunk_info = chunk.getChunkInfo()) { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; + if (const auto * agg_info = typeid_cast(chunk_info.get())) + { + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; + } } return true; diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 9e499e2c400..a5d334f4f1d 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -179,9 +179,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count); Chunks piece; - piece.emplace_back(std::move(columns), count); - piece.back().setChunkInfos(concatenated.getChunkInfos()); - + piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo()); writeRowGroup(std::move(piece)); } } diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp index 46be6e74693..4136fc5a5f2 100644 --- a/src/Processors/IAccumulatingTransform.cpp +++ b/src/Processors/IAccumulatingTransform.cpp @@ -8,9 +8,8 @@ namespace ErrorCodes } IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header) - : IProcessor({std::move(input_header)}, {std::move(output_header)}) - , input(inputs.front()) - , output(outputs.front()) + : IProcessor({std::move(input_header)}, {std::move(output_header)}), + input(inputs.front()), output(outputs.front()) { } diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp index 86675bcb237..466adf93538 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp @@ -53,11 +53,13 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num if (!input.chunk.hasRows()) return; - if (input.chunk.getChunkInfos().empty()) + const auto & info = input.chunk.getChunkInfo(); + if (!info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm"); Int64 allocated_bytes = 0; - if (auto arenas_info = input.chunk.getChunkInfos().get()) + /// Will be set by AggregatingInOrderTransform during local aggregation; will be nullptr during merging on initiator. + if (const auto * arenas_info = typeid_cast(info.get())) allocated_bytes = arenas_info->allocated_bytes; states[source_num] = State{input.chunk, description, allocated_bytes}; @@ -134,7 +136,7 @@ Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge() info->chunk_num = chunk_num++; Chunk chunk; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); return chunk; } @@ -161,7 +163,7 @@ void FinishAggregatingInOrderAlgorithm::addToAggregation() chunks.emplace_back(std::move(new_columns), current_rows); } - chunks.back().getChunkInfos().add(std::make_shared()); + chunks.back().setChunkInfo(std::make_shared()); states[i].current_row = states[i].to_row; /// We assume that sizes in bytes of rows are almost the same. diff --git a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h index e4f22deec8d..bcf4e759024 100644 --- a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h +++ b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h @@ -6,22 +6,18 @@ namespace DB { /// To carry part level if chunk is produced by a merge tree source -class MergeTreePartLevelInfo : public ChunkInfoCloneable +class MergeTreePartLevelInfo : public ChunkInfo { public: MergeTreePartLevelInfo() = delete; - explicit MergeTreePartLevelInfo(ssize_t part_level) - : origin_merge_tree_part_level(part_level) - { } - MergeTreePartLevelInfo(const MergeTreePartLevelInfo & other) = default; - + explicit MergeTreePartLevelInfo(ssize_t part_level) : origin_merge_tree_part_level(part_level) { } size_t origin_merge_tree_part_level = 0; }; inline size_t getPartLevelFromChunk(const Chunk & chunk) { - const auto part_level_info = chunk.getChunkInfos().get(); - if (part_level_info) + const auto & info = chunk.getChunkInfo(); + if (const auto * part_level_info = typeid_cast(info.get())) return part_level_info->origin_merge_tree_part_level; return 0; } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index cd347d371d9..7b2c7d82a01 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes static IMergingAlgorithm::Status emitChunk(detail::SharedChunkPtr & chunk, bool finished = false) { - chunk->getChunkInfos().add(std::make_shared(std::move(chunk->replace_final_selection))); + chunk->setChunkInfo(std::make_shared(std::move(chunk->replace_final_selection))); return IMergingAlgorithm::Status(std::move(*chunk), finished); } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index 2f23f2a5c4d..a3ccccf0845 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -3,7 +3,6 @@ #include #include #include -#include namespace Poco { @@ -15,13 +14,11 @@ namespace DB /** Use in skipping final to keep list of indices of selected row after merging final */ -struct ChunkSelectFinalIndices : public ChunkInfoCloneable +struct ChunkSelectFinalIndices : public ChunkInfo { - explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); - ChunkSelectFinalIndices(const ChunkSelectFinalIndices & other) = default; - const ColumnPtr column_holder; const ColumnUInt64 * select_final_indices = nullptr; + explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); }; /** Merges several sorted inputs into one. diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index b1b0182a113..fbb47969b2f 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -157,7 +157,7 @@ IProcessor::Status IMergingTransformBase::prepare() bool is_port_full = !output.canPush(); /// Push if has data. - if ((state.output_chunk || !state.output_chunk.getChunkInfos().empty()) && !is_port_full) + if ((state.output_chunk || state.output_chunk.hasChunkInfo()) && !is_port_full) output.push(std::move(state.output_chunk)); if (!is_initialized) diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index be629271736..c218f622870 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -129,7 +129,7 @@ public: IMergingAlgorithm::Status status = algorithm.merge(); - if ((status.chunk && status.chunk.hasRows()) || !status.chunk.getChunkInfos().empty()) + if ((status.chunk && status.chunk.hasRows()) || status.chunk.hasChunkInfo()) { // std::cerr << "Got chunk with " << status.chunk.getNumRows() << " rows" << std::endl; state.output_chunk = std::move(status.chunk); diff --git a/src/Processors/Sinks/RemoteSink.h b/src/Processors/Sinks/RemoteSink.h index c05cc1defcb..30cf958c072 100644 --- a/src/Processors/Sinks/RemoteSink.h +++ b/src/Processors/Sinks/RemoteSink.h @@ -20,7 +20,7 @@ public: } String getName() const override { return "RemoteSink"; } - void consume (Chunk & chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.getColumns())); } + void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); } void onFinish() override { RemoteInserter::onFinish(); } }; diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 36bb70f493f..5f9f9f9b1a1 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -15,8 +15,9 @@ void SinkToStorage::onConsume(Chunk chunk) */ Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); - consume(chunk); - cur_chunk = std::move(chunk); + consume(chunk.clone()); + if (!lastBlockIsDuplicate()) + cur_chunk = std::move(chunk); } SinkToStorage::GenerateResult SinkToStorage::onGenerate() diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index c728fa87b1e..023bbd8b094 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -18,7 +18,8 @@ public: void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } protected: - virtual void consume(Chunk & chunk) = 0; + virtual void consume(Chunk chunk) = 0; + virtual bool lastBlockIsDuplicate() const { return false; } private: std::vector table_locks; @@ -37,7 +38,7 @@ class NullSinkToStorage : public SinkToStorage public: using SinkToStorage::SinkToStorage; std::string getName() const override { return "NullSinkToStorage"; } - void consume(Chunk &) override {} + void consume(Chunk) override {} }; using SinkPtr = std::shared_ptr; diff --git a/src/Processors/Sources/BlocksSource.h b/src/Processors/Sources/BlocksSource.h index 7ac460c14e2..ec0dc9609f1 100644 --- a/src/Processors/Sources/BlocksSource.h +++ b/src/Processors/Sources/BlocksSource.h @@ -43,10 +43,7 @@ protected: info->bucket_num = res.info.bucket_num; info->is_overflows = res.info.is_overflows; - auto chunk = Chunk(res.getColumns(), res.rows()); - chunk.getChunkInfos().add(std::move(info)); - - return chunk; + return Chunk(res.getColumns(), res.rows(), std::move(info)); } private: diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 1578bd389c9..3d7dd3f76b8 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -176,7 +176,7 @@ std::optional RemoteSource::tryGenerate() auto info = std::make_shared(); info->bucket_num = block.info.bucket_num; info->is_overflows = block.info.is_overflows; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); } return chunk; diff --git a/src/Processors/Sources/SourceFromSingleChunk.cpp b/src/Processors/Sources/SourceFromSingleChunk.cpp index 9abe0504d10..00f40a34361 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.cpp +++ b/src/Processors/Sources/SourceFromSingleChunk.cpp @@ -5,9 +5,7 @@ namespace DB { -SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) -{ -} +SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) {} SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows()) { @@ -22,7 +20,7 @@ SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmp auto info = std::make_shared(); info->bucket_num = data.info.bucket_num; info->is_overflows = data.info.is_overflows; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 45b0960ec8f..9ffe15d0f85 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -332,7 +332,7 @@ void AggregatingInOrderTransform::generate() variants.aggregates_pool = variants.aggregates_pools.at(0).get(); /// Pass info about used memory by aggregate functions further. - to_push_chunk.getChunkInfos().add(std::make_shared(cur_block_bytes)); + to_push_chunk.setChunkInfo(std::make_shared(cur_block_bytes)); cur_block_bytes = 0; cur_block_size = 0; @@ -351,12 +351,11 @@ FinalizeAggregatedTransform::FinalizeAggregatedTransform(Block header, Aggregati void FinalizeAggregatedTransform::transform(Chunk & chunk) { if (params->final) - { finalizeChunk(chunk, aggregates_mask); - } - else if (!chunk.getChunkInfos().get()) + else if (!chunk.getChunkInfo()) { - chunk.getChunkInfos().add(std::make_shared()); + auto info = std::make_shared(); + chunk.setChunkInfo(std::move(info)); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 41a0d7fc7f1..5d50e97f552 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -5,7 +5,6 @@ #include #include #include -#include namespace DB { @@ -13,12 +12,10 @@ namespace DB struct InputOrderInfo; using InputOrderInfoPtr = std::shared_ptr; -struct ChunkInfoWithAllocatedBytes : public ChunkInfoCloneable +struct ChunkInfoWithAllocatedBytes : public ChunkInfo { - ChunkInfoWithAllocatedBytes(const ChunkInfoWithAllocatedBytes & other) = default; explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_) : allocated_bytes(allocated_bytes_) {} - Int64 allocated_bytes; }; diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 517f035667f..65f0612d738 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -35,7 +35,7 @@ Chunk convertToChunk(const Block & block) UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); return chunk; } @@ -44,11 +44,15 @@ namespace { const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk) { - auto agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo."); - return agg_info.get(); + return agg_info; } /// Reads chunks from file in native format. Provide chunks with aggregation info. @@ -206,7 +210,11 @@ private: void process(Chunk && chunk) { - auto chunks_to_merge = chunk.getChunkInfos().get(); + if (!chunk.hasChunkInfo()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with chunk info in {}", getName()); + + const auto & info = chunk.getChunkInfo(); + const auto * chunks_to_merge = typeid_cast(info.get()); if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with ChunksToMerge info in {}", getName()); diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 95983c39d1e..e167acde067 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -20,7 +19,7 @@ namespace CurrentMetrics namespace DB { -class AggregatedChunkInfo : public ChunkInfoCloneable +class AggregatedChunkInfo : public ChunkInfo { public: bool is_overflows = false; diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 49a6581e685..965a084bb13 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -27,12 +27,18 @@ public: } ExceptionKeepingTransform::work(); + if (finish_chunk) + { + data.chunk = std::move(finish_chunk); + ready_output = true; + } } protected: void onConsume(Chunk chunk) override { - cur_chunk = Squashing::squash(std::move(chunk)); + if (auto res_chunk = DB::Squashing::squash(std::move(chunk))) + cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows()); } GenerateResult onGenerate() override @@ -42,10 +48,16 @@ protected: res.is_done = true; return res; } + void onFinish() override + { + auto chunk = DB::Squashing::squash({}); + finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); + } private: Squashing squashing; Chunk cur_chunk; + Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 2c6b3bd8638..3dfb9fe178f 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -1,7 +1,6 @@ -#include -#include #include +#include #include #include diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp deleted file mode 100644 index 6786f76cbef..00000000000 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ /dev/null @@ -1,236 +0,0 @@ -#include - -#include - -#include -#include -#include - - -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -void RestoreChunkInfosTransform::transform(Chunk & chunk) -{ - chunk.getChunkInfos().append(chunk_infos.clone()); -} - -namespace DeduplicationToken -{ - -String TokenInfo::getToken() const -{ - if (!isDefined()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is not defined, stage {}, token {}", stage, debugToken()); - - return getTokenImpl(); -} - -String TokenInfo::getTokenImpl() const -{ - String result; - result.reserve(getTotalSize()); - - for (const auto & part : parts) - { - if (!result.empty()) - result.append(":"); - result.append(part); - } - - return result; -} - -String TokenInfo::debugToken() const -{ - return getTokenImpl(); -} - -void TokenInfo::addChunkHash(String part) -{ - if (stage == UNDEFINED && empty()) - stage = DEFINE_SOURCE_WITH_HASHES; - - if (stage != DEFINE_SOURCE_WITH_HASHES) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(std::move(part)); -} - -void TokenInfo::finishChunkHashes() -{ - if (stage == UNDEFINED && empty()) - stage = DEFINE_SOURCE_WITH_HASHES; - - if (stage != DEFINE_SOURCE_WITH_HASHES) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - stage = DEFINED; -} - -void TokenInfo::setUserToken(const String & token) -{ - if (stage == UNDEFINED && empty()) - stage = DEFINE_SOURCE_USER_TOKEN; - - if (stage != DEFINE_SOURCE_USER_TOKEN) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("user-token-{}", token)); -} - -void TokenInfo::setSourceWithUserToken(size_t block_number) -{ - if (stage != DEFINE_SOURCE_USER_TOKEN) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("source-number-{}", block_number)); - - stage = DEFINED; -} - -void TokenInfo::setViewID(const String & id) -{ - if (stage == DEFINED) - stage = DEFINE_VIEW; - - if (stage != DEFINE_VIEW) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("view-id-{}", id)); -} - -void TokenInfo::setViewBlockNumber(size_t block_number) -{ - if (stage != DEFINE_VIEW) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("view-block-{}", block_number)); - - stage = DEFINED; -} - -void TokenInfo::reset() -{ - stage = UNDEFINED; - parts.clear(); -} - -void TokenInfo::addTokenPart(String part) -{ - parts.push_back(std::move(part)); -} - -size_t TokenInfo::getTotalSize() const -{ - if (parts.empty()) - return 0; - - size_t size = 0; - for (const auto & part : parts) - size += part.size(); - - // we reserve more size here to be able to add delimenter between parts. - return size + parts.size() - 1; -} - -#ifdef ABORT_ON_LOGICAL_ERROR -void CheckTokenTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); - - LOG_DEBUG(log, "debug: {}, token: {}", debug, token_info->debugToken()); -} -#endif - -String DefineSourceWithChunkHashTransform::getChunkHash(const Chunk & chunk) -{ - SipHash hash; - for (const auto & colunm : chunk.getColumns()) - colunm->updateHashFast(hash); - - const auto hash_value = hash.get128(); - return toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); -} - - -void DefineSourceWithChunkHashTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in DefineSourceWithChunkHashesTransform"); - - if (token_info->isDefined()) - return; - - token_info->addChunkHash(getChunkHash(chunk)); - token_info->finishChunkHashes(); -} - -void SetUserTokenTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetUserTokenTransform"); - token_info->setUserToken(user_token); -} - -void SetSourceBlockNumberTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform"); - token_info->setSourceWithUserToken(block_number++); -} - -void SetViewIDTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetViewIDTransform"); - token_info->setViewID(view_id); -} - -void SetViewBlockNumberTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform"); - token_info->setViewBlockNumber(block_number++); -} - -void ResetTokenTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in ResetTokenTransform"); - - token_info->reset(); -} - -} -} diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h deleted file mode 100644 index d6aff9e1370..00000000000 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ /dev/null @@ -1,237 +0,0 @@ -#pragma once - -#include -#include - -#include -#include "Common/Logger.h" - - -namespace DB -{ - class RestoreChunkInfosTransform : public ISimpleTransform - { - public: - RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , chunk_infos(std::move(chunk_infos_)) - {} - - String getName() const override { return "RestoreChunkInfosTransform"; } - - void transform(Chunk & chunk) override; - - private: - Chunk::ChunkInfoCollection chunk_infos; - }; - - -namespace DeduplicationToken -{ - class TokenInfo : public ChunkInfoCloneable - { - public: - TokenInfo() = default; - TokenInfo(const TokenInfo & other) = default; - - String getToken() const; - String debugToken() const; - - bool empty() const { return parts.empty(); } - - bool isDefined() const { return stage == DEFINED; } - - void addChunkHash(String part); - void finishChunkHashes(); - - void setUserToken(const String & token); - void setSourceWithUserToken(size_t block_number); - - void setViewID(const String & id); - void setViewBlockNumber(size_t block_number); - - void reset(); - - private: - String getTokenImpl() const; - - void addTokenPart(String part); - size_t getTotalSize() const; - - /* Token has to be prepared in a particular order. - * BuildingStage ensures that token is expanded according the following order. - * Firstly token is expanded with information about the source. - * It could be done with two ways: add several hash sums from the source chunks or provide user defined deduplication token and its sequentional block number. - * - * transition // method - * UNDEFINED -> DEFINE_SOURCE_WITH_HASHES // addChunkHash - * DEFINE_SOURCE_WITH_HASHES -> DEFINE_SOURCE_WITH_HASHES // addChunkHash - * DEFINE_SOURCE_WITH_HASHES -> DEFINED // defineSourceWithChankHashes - * - * transition // method - * UNDEFINED -> DEFINE_SOURCE_USER_TOKEN // setUserToken - * DEFINE_SOURCE_USER_TOKEN -> DEFINED // defineSourceWithUserToken - * - * After token is defined, it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views. - * - * transition // method - * DEFINED -> DEFINE_VIEW // setViewID - * DEFINE_VIEW -> DEFINED // defineViewID - */ - - enum BuildingStage - { - UNDEFINED, - DEFINE_SOURCE_WITH_HASHES, - DEFINE_SOURCE_USER_TOKEN, - DEFINE_VIEW, - DEFINED, - }; - - BuildingStage stage = UNDEFINED; - std::vector parts; - }; - - -#ifdef ABORT_ON_LOGICAL_ERROR - /// use that class only with debug builds in CI for introspection - class CheckTokenTransform : public ISimpleTransform - { - public: - CheckTokenTransform(String debug_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , debug(std::move(debug_)) - { - } - - String getName() const override { return "DeduplicationToken::CheckTokenTransform"; } - - void transform(Chunk & chunk) override; - - private: - String debug; - LoggerPtr log = getLogger("CheckInsertDeduplicationTokenTransform"); - }; -#endif - - - class AddTokenInfoTransform : public ISimpleTransform - { - public: - explicit AddTokenInfoTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::AddTokenInfoTransform"; } - - void transform(Chunk & chunk) override - { - chunk.getChunkInfos().add(std::make_shared()); - } - }; - - - class DefineSourceWithChunkHashTransform : public ISimpleTransform - { - public: - explicit DefineSourceWithChunkHashTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::DefineSourceWithChunkHashesTransform"; } - - // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with hashes from the parts. - // But if there is some table with different engine, we still need to define the source of the data in deduplication token - // We use that transform to define the source as a hash of entire block in deduplication token - void transform(Chunk & chunk) override; - - static String getChunkHash(const Chunk & chunk); - }; - - class ResetTokenTransform : public ISimpleTransform - { - public: - explicit ResetTokenTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::ResetTokenTransform"; } - - void transform(Chunk & chunk) override; - }; - - - class SetUserTokenTransform : public ISimpleTransform - { - public: - SetUserTokenTransform(String user_token_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , user_token(std::move(user_token_)) - { - } - - String getName() const override { return "DeduplicationToken::SetUserTokenTransform"; } - - void transform(Chunk & chunk) override; - - private: - String user_token; - }; - - - class SetSourceBlockNumberTransform : public ISimpleTransform - { - public: - explicit SetSourceBlockNumberTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::SetSourceBlockNumberTransform"; } - - void transform(Chunk & chunk) override; - - private: - size_t block_number = 0; - }; - - - class SetViewIDTransform : public ISimpleTransform - { - public: - SetViewIDTransform(String view_id_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , view_id(std::move(view_id_)) - { - } - - String getName() const override { return "DeduplicationToken::SetViewIDTransform"; } - - void transform(Chunk & chunk) override; - - private: - String view_id; - }; - - - class SetViewBlockNumberTransform : public ISimpleTransform - { - public: - explicit SetViewBlockNumberTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::SetViewBlockNumberTransform"; } - - void transform(Chunk & chunk) override; - - private: - size_t block_number = 0; - }; - -} -} diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 04fabc9a3c6..2fbd2c21b8d 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -1,7 +1,5 @@ #include #include - - namespace DB { diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index ca204bcb482..3e2a9462e54 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -365,9 +365,10 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare() return Status::Finished; } - task = data.chunk.getChunkInfos().get(); - if (!task) + if (!data.chunk.hasChunkInfo()) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info"); + + task = std::dynamic_pointer_cast(data.chunk.getChunkInfo()); } else { @@ -478,7 +479,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() if (output.isFinished()) continue; Chunk chunk; - chunk.getChunkInfos().add(std::make_shared()); + chunk.setChunkInfo(std::make_shared()); output.push(std::move(chunk)); output.finish(); } @@ -495,7 +496,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() { Chunk chunk; auto task = std::make_shared(delayed_blocks, left_delayed_stream_finished_counter); - chunk.getChunkInfos().add(std::move(task)); + chunk.setChunkInfo(task); output.push(std::move(chunk)); } delayed_blocks = nullptr; diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index 5f6d9d6fff2..a308af03662 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -1,7 +1,6 @@ #pragma once #include -#include -#include + namespace DB { @@ -112,12 +111,11 @@ private: }; -class DelayedBlocksTask : public ChunkInfoCloneable +class DelayedBlocksTask : public ChunkInfo { public: DelayedBlocksTask() = default; - DelayedBlocksTask(const DelayedBlocksTask & other) = default; explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_) : delayed_blocks(std::move(delayed_blocks_)) , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_) diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index 9ae80e21a68..1eaa5458d37 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -1,7 +1,6 @@ #include #include - namespace DB { diff --git a/src/Processors/Transforms/MemoryBoundMerging.h b/src/Processors/Transforms/MemoryBoundMerging.h index d7bc320173b..607087fb39c 100644 --- a/src/Processors/Transforms/MemoryBoundMerging.h +++ b/src/Processors/Transforms/MemoryBoundMerging.h @@ -150,7 +150,11 @@ private: if (!chunk.hasRows()) return; - const auto & agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception( ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedForMemoryBoundMergingTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index ea9ebb0f96e..fc40c6894bb 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -30,10 +30,10 @@ void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_ auto info = std::make_shared(); info->bucket_num = bucket; info->is_overflows = is_overflows; - info->chunks = std::make_shared(std::move(chunks)); + info->chunks = std::make_unique(std::move(chunks)); Chunk chunk; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); output.push(std::move(chunk)); } @@ -255,10 +255,11 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) if (!chunk.hasRows()) return; - if (chunk.getChunkInfos().empty()) + const auto & info = chunk.getChunkInfo(); + if (!info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in GroupingAggregatedTransform."); - if (auto agg_info = chunk.getChunkInfos().get()) + if (const auto * agg_info = typeid_cast(info.get())) { Int32 bucket = agg_info->bucket_num; bool is_overflows = agg_info->is_overflows; @@ -274,7 +275,7 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) last_bucket_number[input] = bucket; } } - else if (chunk.getChunkInfos().get()) + else if (typeid_cast(info.get())) { single_level_chunks.emplace_back(std::move(chunk)); } @@ -303,11 +304,7 @@ void GroupingAggregatedTransform::work() Int32 bucket = cur_block.info.bucket_num; auto chunk_info = std::make_shared(); chunk_info->bucket_num = bucket; - - auto chunk = Chunk(cur_block.getColumns(), cur_block.rows()); - chunk.getChunkInfos().add(std::move(chunk_info)); - - chunks_map[bucket].emplace_back(std::move(chunk)); + chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info))); } } } @@ -322,7 +319,9 @@ MergingAggregatedBucketTransform::MergingAggregatedBucketTransform( void MergingAggregatedBucketTransform::transform(Chunk & chunk) { - auto chunks_to_merge = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + const auto * chunks_to_merge = typeid_cast(info.get()); + if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge."); @@ -331,10 +330,11 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) BlocksList blocks_list; for (auto & cur_chunk : *chunks_to_merge->chunks) { - if (cur_chunk.getChunkInfos().empty()) + const auto & cur_info = cur_chunk.getChunkInfo(); + if (!cur_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedBucketTransform."); - if (auto agg_info = cur_chunk.getChunkInfos().get()) + if (const auto * agg_info = typeid_cast(cur_info.get())) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = agg_info->is_overflows; @@ -342,7 +342,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) blocks_list.emplace_back(std::move(block)); } - else if (cur_chunk.getChunkInfos().get()) + else if (typeid_cast(cur_info.get())) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = false; @@ -361,7 +361,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) res_info->is_overflows = chunks_to_merge->is_overflows; res_info->bucket_num = chunks_to_merge->bucket_num; res_info->chunk_num = chunks_to_merge->chunk_num; - chunk.getChunkInfos().add(std::move(res_info)); + chunk.setChunkInfo(std::move(res_info)); auto block = params->aggregator.mergeBlocks(blocks_list, params->final, is_cancelled); @@ -405,7 +405,11 @@ bool SortingAggregatedTransform::tryPushChunk() void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input) { - auto agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedTransform."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h index 3a3c1bd9c1e..77ee3034ffc 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -143,9 +142,9 @@ private: void addChunk(Chunk chunk, size_t from_input); }; -struct ChunksToMerge : public ChunkInfoCloneable +struct ChunksToMerge : public ChunkInfo { - std::shared_ptr chunks; + std::unique_ptr chunks; Int32 bucket_num = -1; bool is_overflows = false; UInt64 chunk_num = 0; // chunk number in order of generation, used during memory bound merging to restore chunks order diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index 446e60a0b81..ad723da7527 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -32,10 +32,11 @@ void MergingAggregatedTransform::consume(Chunk chunk) total_input_rows += input_rows; ++total_input_blocks; - if (chunk.getChunkInfos().empty()) + const auto & info = chunk.getChunkInfo(); + if (!info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedTransform."); - if (auto agg_info = chunk.getChunkInfos().get()) + if (const auto * agg_info = typeid_cast(info.get())) { /** If the remote servers used a two-level aggregation method, * then blocks will contain information about the number of the bucket. @@ -48,7 +49,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) bucket_to_blocks[agg_info->bucket_num].emplace_back(std::move(block)); } - else if (chunk.getChunkInfos().get()) + else if (typeid_cast(info.get())) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); block.info.is_overflows = false; @@ -88,8 +89,7 @@ Chunk MergingAggregatedTransform::generate() UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); return chunk; } diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index ee4dfa6a64e..0f433165f14 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -10,20 +10,20 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform( - Block header_, size_t min_block_size_rows, size_t min_block_size_bytes) - : IInflatingTransform(header_, header_) - , squashing(header_, min_block_size_rows, min_block_size_bytes) + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : IInflatingTransform(header, header), squashing(header, min_block_size_rows, min_block_size_bytes) { } void PlanSquashingTransform::consume(Chunk chunk) { - squashed_chunk = squashing.add(std::move(chunk)); + if (Chunk current_chunk = squashing.add(std::move(chunk)); current_chunk.hasChunkInfo()) + squashed_chunk.swap(current_chunk); } Chunk PlanSquashingTransform::generate() { - if (!squashed_chunk) + if (!squashed_chunk.hasChunkInfo()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); Chunk result_chunk; @@ -33,11 +33,12 @@ Chunk PlanSquashingTransform::generate() bool PlanSquashingTransform::canGenerate() { - return bool(squashed_chunk); + return squashed_chunk.hasChunkInfo(); } Chunk PlanSquashingTransform::getRemaining() { - return squashing.flush(); + Chunk current_chunk = squashing.flush(); + return current_chunk; } } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index e6db245499e..4ad2ec2d089 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -10,7 +10,7 @@ class PlanSquashingTransform : public IInflatingTransform { public: PlanSquashingTransform( - Block header_, size_t min_block_size_rows, size_t min_block_size_bytes); + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "PlanSquashingTransform"; } @@ -23,6 +23,7 @@ protected: private: Squashing squashing; Chunk squashed_chunk; + Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/SelectByIndicesTransform.h b/src/Processors/Transforms/SelectByIndicesTransform.h index b44f5a3203e..480ab1a0f61 100644 --- a/src/Processors/Transforms/SelectByIndicesTransform.h +++ b/src/Processors/Transforms/SelectByIndicesTransform.h @@ -26,7 +26,7 @@ public: void transform(Chunk & chunk) override { size_t num_rows = chunk.getNumRows(); - auto select_final_indices_info = chunk.getChunkInfos().extract(); + const auto * select_final_indices_info = typeid_cast(chunk.getChunkInfo().get()); if (!select_final_indices_info || !select_final_indices_info->select_final_indices) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk passed to SelectByIndicesTransform without indices column"); @@ -41,6 +41,7 @@ public: chunk.setColumns(std::move(columns), index_column->size()); } + chunk.setChunkInfo(nullptr); } }; diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 1fb4433240a..34b733cde5e 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -18,7 +18,9 @@ SquashingTransform::SquashingTransform( void SquashingTransform::onConsume(Chunk chunk) { - cur_chunk = Squashing::squash(squashing.add(std::move(chunk))); + Chunk planned_chunk = squashing.add(std::move(chunk)); + if (planned_chunk.hasChunkInfo()) + cur_chunk = DB::Squashing::squash(std::move(planned_chunk)); } SquashingTransform::GenerateResult SquashingTransform::onGenerate() @@ -31,7 +33,10 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate() void SquashingTransform::onFinish() { - finish_chunk = Squashing::squash(squashing.flush()); + Chunk chunk = squashing.flush(); + if (chunk.hasChunkInfo()) + chunk = DB::Squashing::squash(std::move(chunk)); + finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); } void SquashingTransform::work() @@ -44,7 +49,6 @@ void SquashingTransform::work() } ExceptionKeepingTransform::work(); - if (finish_chunk) { data.chunk = std::move(finish_chunk); @@ -63,14 +67,18 @@ void SimpleSquashingTransform::transform(Chunk & chunk) { if (!finished) { - chunk = Squashing::squash(squashing.add(std::move(chunk))); + Chunk planned_chunk = squashing.add(std::move(chunk)); + if (planned_chunk.hasChunkInfo()) + chunk = DB::Squashing::squash(std::move(planned_chunk)); } else { if (chunk.hasRows()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - chunk = Squashing::squash(squashing.flush()); + chunk = squashing.flush(); + if (chunk.hasChunkInfo()) + chunk = DB::Squashing::squash(std::move(chunk)); } } diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 59fceccb538..aa86879e62c 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -150,7 +150,11 @@ void TotalsHavingTransform::transform(Chunk & chunk) /// Block with values not included in `max_rows_to_group_by`. We'll postpone it. if (overflow_row) { - const auto & agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in TotalsHavingTransform."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in TotalsHavingTransform."); diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 312b333ab33..25fbf13b0e7 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -5,9 +5,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -18,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -27,12 +24,9 @@ #include #include #include -#include "base/defines.h" -#include #include #include -#include namespace ProfileEvents @@ -111,7 +105,7 @@ private: class ExecutingInnerQueryFromViewTransform final : public ExceptionKeepingTransform { public: - ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_, bool disable_deduplication_for_children_); + ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_); String getName() const override { return "ExecutingInnerQueryFromView"; } @@ -122,7 +116,6 @@ protected: private: ViewsDataPtr views_data; ViewRuntimeData & view; - bool disable_deduplication_for_children; struct State { @@ -145,7 +138,7 @@ class PushingToLiveViewSink final : public SinkToStorage public: PushingToLiveViewSink(const Block & header, StorageLiveView & live_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToLiveViewSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; private: StorageLiveView & live_view; @@ -159,7 +152,7 @@ class PushingToWindowViewSink final : public SinkToStorage public: PushingToWindowViewSink(const Block & header, StorageWindowView & window_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToWindowViewSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; private: StorageWindowView & window_view; @@ -223,10 +216,45 @@ std::optional generateViewChain( const auto & insert_settings = insert_context->getSettingsRef(); + // Do not deduplicate insertions into MV if the main insertion is Ok if (disable_deduplication_for_children) { insert_context->setSetting("insert_deduplicate", Field{false}); } + else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views && + !insert_settings.insert_deduplication_token.value.empty()) + { + /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle + * deduplication in complex INSERT flows. + * + * Example: + * + * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1 + * | | + * └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘ + * + * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will + * be inserted into `ds_2_1`. + * + * We are forced to use view id instead of table id because there are some possible INSERT flows where no tables + * are involved. + * + * Example: + * + * landing -┬--> mv_1_1 --┬-> ds_1_1 + * | | + * └--> mv_1_2 --┘ + * + */ + auto insert_deduplication_token = insert_settings.insert_deduplication_token.value; + + if (view_id.hasUUID()) + insert_deduplication_token += "_" + toString(view_id.uuid); + else + insert_deduplication_token += "_" + view_id.getFullNameNotQuoted(); + + insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); + } // Processing of blocks for MVs is done block by block, and there will // be no parallel reading after (plus it is not a costless operation) @@ -333,13 +361,7 @@ std::optional generateViewChain( insert_columns.emplace_back(column.name); } - InterpreterInsertQuery interpreter( - nullptr, - insert_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false); /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false` bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; @@ -356,10 +378,6 @@ std::optional generateViewChain( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } -#ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Before squashing", out.getInputHeader())); -#endif - auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); counting->setProgressCallback(insert_context->getProgressCallback()); @@ -402,19 +420,11 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { -#ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Right after Inner query", out.getInputHeader())); -#endif - auto executing_inner_query = std::make_shared( - storage_header, views_data->views.back(), views_data, disable_deduplication_for_children); + storage_header, views_data->views.back(), views_data); executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms); out.addSource(std::move(executing_inner_query)); - -#ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Right before Inner query", out.getInputHeader())); -#endif } return out; @@ -455,7 +465,11 @@ Chain buildPushingToViewsChain( */ result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); - bool disable_deduplication_for_children = !context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views; + /// If the "root" table deduplicates blocks, there are no need to make deduplication for children + /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks + bool disable_deduplication_for_children = false; + if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) + disable_deduplication_for_children = !no_destination && storage->supportsDeduplication(); auto table_id = storage->getStorageID(); auto views = DatabaseCatalog::instance().getDependentViews(table_id); @@ -546,25 +560,12 @@ Chain buildPushingToViewsChain( auto sink = std::make_shared(live_view_header, *live_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (auto * window_view = dynamic_cast(storage.get())) { auto sink = std::make_shared(window_view->getInputHeader(), *window_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); - } - else if (dynamic_cast(storage.get())) - { - auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); - metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); - sink->setRuntimeData(thread_status, elapsed_counter_ms); - result_chain.addSource(std::move(sink)); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } /// Do not push to destination table if the flag is set else if (!no_destination) @@ -572,15 +573,8 @@ Chain buildPushingToViewsChain( auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); sink->setRuntimeData(thread_status, elapsed_counter_ms); - - result_chain.addSource(std::make_shared(sink->getHeader())); - result_chain.addSource(std::move(sink)); } - else - { - result_chain.addSource(std::make_shared(storage_header)); - } if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); @@ -596,7 +590,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection && chunk_infos, bool disable_deduplication_for_children) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data) { const auto & context = view.context; @@ -643,19 +637,6 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), std::make_shared(std::move(converting)))); - pipeline.addTransform(std::make_shared(std::move(chunk_infos), pipeline.getHeader())); - - if (!disable_deduplication_for_children) - { - String materialize_view_id = view.table_id.hasUUID() ? toString(view.table_id.uuid) : view.table_id.getFullNameNotQuoted(); - pipeline.addTransform(std::make_shared(std::move(materialize_view_id), pipeline.getHeader())); - pipeline.addTransform(std::make_shared(pipeline.getHeader())); - } - else - { - pipeline.addTransform(std::make_shared(pipeline.getHeader())); - } - return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } @@ -747,19 +728,17 @@ IProcessor::Status CopyingDataToViewsTransform::prepare() ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( const Block & header, ViewRuntimeData & view_, - std::shared_ptr views_data_, - bool disable_deduplication_for_children_) + std::shared_ptr views_data_) : ExceptionKeepingTransform(header, view_.sample_block) , views_data(std::move(views_data_)) , view(view_) - , disable_deduplication_for_children(disable_deduplication_for_children_) { } void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { - auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); - state.emplace(process(std::move(block), view, *views_data, std::move(chunk.getChunkInfos()), disable_deduplication_for_children)); + auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); + state.emplace(process(block, view, *views_data)); } @@ -791,10 +770,10 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi { } -void PushingToLiveViewSink::consume(Chunk & chunk) +void PushingToLiveViewSink::consume(Chunk chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(live_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); + live_view.writeBlock(getHeader().cloneWithColumns(chunk.detachColumns()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -814,11 +793,11 @@ PushingToWindowViewSink::PushingToWindowViewSink( { } -void PushingToWindowViewSink::consume(Chunk & chunk) +void PushingToWindowViewSink::consume(Chunk chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); + window_view, getHeader().cloneWithColumns(chunk.detachColumns()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index a9e5b1535c0..f0b2ead687e 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -193,7 +193,7 @@ public: return concurrency_control; } - void addResources(QueryPlanResourceHolder resources_) { resources.append(std::move(resources_)); } + void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); } void setQueryIdHolder(std::shared_ptr query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); } void addContext(ContextPtr context) { resources.interpreter_context.emplace_back(std::move(context)); } diff --git a/src/QueryPipeline/QueryPlanResourceHolder.cpp b/src/QueryPipeline/QueryPlanResourceHolder.cpp index bb2be2c8ffb..2cd4dc42a83 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.cpp +++ b/src/QueryPipeline/QueryPlanResourceHolder.cpp @@ -5,7 +5,7 @@ namespace DB { -QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolder && rhs) noexcept +QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept { table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end()); storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end()); @@ -16,12 +16,6 @@ QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolde return *this; } -QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept -{ - append(std::move(rhs)); - return *this; -} - QueryPlanResourceHolder::QueryPlanResourceHolder() = default; QueryPlanResourceHolder::QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept = default; QueryPlanResourceHolder::~QueryPlanResourceHolder() = default; diff --git a/src/QueryPipeline/QueryPlanResourceHolder.h b/src/QueryPipeline/QueryPlanResourceHolder.h index 10f7f39ab09..ed9eb68b7ba 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.h +++ b/src/QueryPipeline/QueryPlanResourceHolder.h @@ -20,11 +20,8 @@ struct QueryPlanResourceHolder QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept; ~QueryPlanResourceHolder(); - QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &) = delete; - /// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs. QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &&) noexcept; - QueryPlanResourceHolder & append(QueryPlanResourceHolder &&) noexcept; /// Some processors may implicitly use Context or temporary Storage created by Interpreter. /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index fccea9e258e..ac1423f87c1 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -888,11 +888,12 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro while (readDataNext()) { - squashing.setHeader(state.block_for_insert.cloneEmpty()); - auto result_chunk = Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()})); - if (result_chunk) + squashing.header = state.block_for_insert; + auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); + if (planned_chunk.hasChunkInfo()) { - auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); + Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk)); + auto result = state.block_for_insert.cloneWithColumns(result_chunk.getColumns()); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -901,13 +902,12 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - Chunk result_chunk = Squashing::squash(squashing.flush()); - if (!result_chunk) - { - return insert_queue.pushQueryWithBlock(state.parsed_query, squashing.getHeader(), query_context); - } + auto planned_chunk = squashing.flush(); + Chunk result_chunk; + if (planned_chunk.hasChunkInfo()) + result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); + auto result = squashing.header.cloneWithColumns(result_chunk.getColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 8791668cd89..e556bda2561 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -134,7 +134,7 @@ DistributedSink::DistributedSink( } -void DistributedSink::consume(Chunk & chunk) +void DistributedSink::consume(Chunk chunk) { if (is_first_chunk) { @@ -142,7 +142,7 @@ void DistributedSink::consume(Chunk & chunk) is_first_chunk = false; } - auto ordinary_block = getHeader().cloneWithColumns(chunk.getColumns()); + auto ordinary_block = getHeader().cloneWithColumns(chunk.detachColumns()); if (insert_sync) writeSync(ordinary_block); @@ -420,13 +420,7 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si /// to resolve tables (in InterpreterInsertQuery::getTable()) auto copy_query_ast = query_ast->clone(); - InterpreterInsertQuery interp( - copy_query_ast, - job.local_context, - allow_materialized, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized); auto block_io = interp.execute(); job.pipeline = std::move(block_io.pipeline); @@ -721,13 +715,7 @@ void DistributedSink::writeToLocal(const Cluster::ShardInfo & shard_info, const try { - InterpreterInsertQuery interp( - query_ast, - context, - allow_materialized, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interp(query_ast, context, allow_materialized); auto block_io = interp.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index 5b7396f2c6f..a4c95633595 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -49,7 +49,7 @@ public: const Names & columns_to_send_); String getName() const override { return "DistributedSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; private: diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 0f9bd8b6ff9..abd4b4ce23b 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -740,14 +740,7 @@ bool StorageFileLog::streamToViews() auto new_context = Context::createCopy(getContext()); - InterpreterInsertQuery interpreter( - insert, - new_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); - + InterpreterInsertQuery interpreter(insert, new_context, false, true, true); auto block_io = interpreter.execute(); /// Each stream responsible for closing it's files and store meta diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 809401bb279..f5c5d093ce1 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1099,13 +1099,7 @@ bool StorageKafka::streamToViews() // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter( - insert, - kafka_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true); auto block_io = interpreter.execute(); // Create a stream for each consumer and join them in a union stream diff --git a/src/Storages/LiveView/LiveViewSink.h b/src/Storages/LiveView/LiveViewSink.h index 9803fa0a160..792133ced64 100644 --- a/src/Storages/LiveView/LiveViewSink.h +++ b/src/Storages/LiveView/LiveViewSink.h @@ -71,9 +71,9 @@ public: new_hash.reset(); } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); block.updateHash(*new_hash); new_blocks->push_back(std::move(block)); } diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 82759e8a851..57a1ea302f9 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -21,7 +21,6 @@ limitations under the License. */ #include #include #include -#include #include #include #include @@ -331,7 +330,7 @@ Pipe StorageLiveView::watch( return reader; } -void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) +void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) { auto output = std::make_shared(*this); @@ -408,21 +407,6 @@ void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Ch builder = interpreter.buildQueryPipeline(); } - builder.addSimpleTransform([&](const Block & cur_header) - { - return std::make_shared(chunk_infos.clone(), cur_header); - }); - - String live_view_id = live_view.getStorageID().hasUUID() ? toString(live_view.getStorageID().uuid) : live_view.getStorageID().getFullNameNotQuoted(); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(live_view_id, stream_header); - }); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(stream_header); - }); - builder.addSimpleTransform([&](const Block & cur_header) { return std::make_shared(cur_header); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 12d8e898347..91daac32c7b 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -118,7 +118,7 @@ public: return 0; } - void writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); + void writeBlock(const Block & block, ContextPtr context); void refresh(); diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index ff5214a5e51..bc8cb0ce69a 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -377,13 +377,7 @@ void RefreshTask::executeRefreshUnlocked(std::shared_ptr(task->getInfo().data_part->info.level)); - return ChunkAndProgress{ - .chunk = std::move(chunk), + .chunk = Chunk(ordered_columns, res.row_count, add_part_level ? std::make_shared(task->getInfo().data_part->info.level) : nullptr), .num_read_rows = res.num_read_rows, .num_read_bytes = res.num_read_bytes, .is_finished = false}; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 4f90f7131da..02f8d6f4f6a 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -264,10 +264,7 @@ try ++it; } - auto result = Chunk(std::move(res_columns), rows_read); - if (add_part_level) - result.getChunkInfos().add(std::make_shared(data_part->info.level)); - return result; + return Chunk(std::move(res_columns), rows_read, add_part_level ? std::make_shared(data_part->info.level) : nullptr); } } else diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index d8cfce1ca99..05751e0fa6f 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,27 +1,14 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include - -#include +#include +#include +#include namespace ProfileEvents { extern const Event DuplicatedInsertedBlocks; } -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - namespace DB { @@ -71,12 +58,12 @@ void MergeTreeSink::onCancel() { } -void MergeTreeSink::consume(Chunk & chunk) +void MergeTreeSink::consume(Chunk chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(nullptr, context, false); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); @@ -89,18 +76,6 @@ void MergeTreeSink::consume(Chunk & chunk) size_t streams = 0; bool support_parallel_write = false; - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", - storage.getStorageID().getNameForLogs()); - - const bool need_to_define_dedup_token = !token_info->isDefined(); - - String block_dedup_token; - if (token_info->isDefined()) - block_dedup_token = token_info->getToken(); - for (auto & current_block : part_blocks) { ProfileEvents::Counters part_counters; @@ -125,16 +100,22 @@ void MergeTreeSink::consume(Chunk & chunk) if (!temp_part.part) continue; - if (need_to_define_dedup_token) - { - chassert(temp_part.part); - const auto hash_value = temp_part.part->getPartBlockIDHash(); - token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); - } - if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) support_parallel_write = true; + String block_dedup_token; + if (storage.getDeduplicationLog()) + { + const String & dedup_token = settings.insert_deduplication_token; + if (!dedup_token.empty()) + { + /// multiple blocks can be inserted within the same insert query + /// an ordinal number is added to dedup token to generate a distinctive block id for each block + block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); + ++chunk_dedup_seqnum; + } + } + size_t max_insert_delayed_streams_for_parallel_write; if (settings.max_insert_delayed_streams_for_parallel_write.changed) @@ -146,7 +127,6 @@ void MergeTreeSink::consume(Chunk & chunk) /// In case of too much columns/parts in block, flush explicitly. streams += temp_part.streams.size(); - if (streams > max_insert_delayed_streams_for_parallel_write) { finishDelayedChunk(); @@ -163,16 +143,11 @@ void MergeTreeSink::consume(Chunk & chunk) { .temp_part = std::move(temp_part), .elapsed_ns = elapsed_ns, - .block_dedup_token = block_dedup_token, + .block_dedup_token = std::move(block_dedup_token), .part_counters = std::move(part_counters), }); } - if (need_to_define_dedup_token) - { - token_info->finishChunkHashes(); - } - finishDelayedChunk(); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); @@ -185,8 +160,6 @@ void MergeTreeSink::finishDelayedChunk() if (!delayed_chunk) return; - const Settings & settings = context->getSettingsRef(); - for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -205,8 +178,7 @@ void MergeTreeSink::finishDelayedChunk() storage.fillNewPartName(part, lock); auto * deduplication_log = storage.getDeduplicationLog(); - - if (settings.insert_deduplicate && deduplication_log) + if (deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); auto res = deduplication_log->addPart(block_id, part->info); diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index 90976020d52..cf6715a3415 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -25,7 +25,7 @@ public: ~MergeTreeSink() override; String getName() const override { return "MergeTreeSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onStart() override; void onFinish() override; void onCancel() override; @@ -36,6 +36,7 @@ private: size_t max_parts_per_block; ContextPtr context; StorageSnapshotPtr storage_snapshot; + UInt64 chunk_dedup_seqnum = 0; /// input chunk ordinal number in case of dedup token UInt64 num_blocks_processed = 0; /// We can delay processing for previous chunk and start writing a new one. diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 3dbcb5e5bda..a552ee89aee 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1297,7 +1297,6 @@ void PartMergerWriter::prepare() bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Block cur_block; - Block projection_header; if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block)) { if (ctx->minmax_idx) @@ -1315,12 +1314,14 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - projection_squashes[i].setHeader(block_to_squash.cloneEmpty()); + projection_squashes[i].header = block_to_squash; + Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); - Chunk squashed_chunk = Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); - if (squashed_chunk) + if (planned_chunk.hasChunkInfo()) { - auto result = projection_squashes[i].getHeader().cloneWithColumns(squashed_chunk.detachColumns()); + Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = block_to_squash.cloneWithColumns(projection_chunk.getColumns()); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); @@ -1341,10 +1342,12 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash_plan = projection_squashes[i]; - auto squashed_chunk = Squashing::squash(projection_squash_plan.flush()); - if (squashed_chunk) + auto planned_chunk = projection_squash_plan.flush(); + if (planned_chunk.hasChunkInfo()) { - auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns()); + Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = projection_squash_plan.header.cloneWithColumns(projection_chunk.getColumns()); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index bbae054fbed..4b4f4c33e7d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -1,25 +1,21 @@ +#include +#include +#include +#include +#include #include "Common/Exception.h" #include #include #include -#include #include -#include -#include -#include -#include -#include -#include -#include +#include #include #include -#include -#include -#include - +#include +#include +#include +#include #include -#include - namespace ProfileEvents { @@ -257,12 +253,12 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const } template -void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) +void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, false); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); const auto & settings = context->getSettingsRef(); @@ -288,25 +284,13 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if constexpr (async_insert) { - const auto async_insert_info_ptr = chunk.getChunkInfos().get(); - if (async_insert_info_ptr) + const auto & chunk_info = chunk.getChunkInfo(); + if (const auto * async_insert_info_ptr = typeid_cast(chunk_info.get())) async_insert_info = std::make_shared(async_insert_info_ptr->offsets, async_insert_info_ptr->tokens); else throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } - String block_dedup_token; - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", - storage.getStorageID().getNameForLogs()); - - const bool need_to_define_dedup_token = !token_info->isDefined(); - - if (token_info->isDefined()) - block_dedup_token = token_info->getToken(); - auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); using DelayedPartition = typename ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition; @@ -358,10 +342,23 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } else { + if (deduplicate) { + String block_dedup_token; + /// We add the hash from the data and partition identifier to deduplication ID. /// That is, do not insert the same data to the same partition twice. + + const String & dedup_token = settings.insert_deduplication_token; + if (!dedup_token.empty()) + { + /// multiple blocks can be inserted within the same insert query + /// an ordinal number is added to dedup token to generate a distinctive block id for each block + block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); + ++chunk_dedup_seqnum; + } + block_id = temp_part.part->getZeroLevelPartBlockID(block_dedup_token); LOG_DEBUG(log, "Wrote block with ID '{}', {} rows{}", block_id, current_block.block.rows(), quorumLogMessage(replicas_num)); } @@ -369,13 +366,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) { LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - - if (need_to_define_dedup_token) - { - chassert(temp_part.part); - const auto hash_value = temp_part.part->getPartBlockIDHash(); - token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); - } } profile_events_scope.reset(); @@ -421,15 +411,17 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) )); } - if (need_to_define_dedup_token) - { - token_info->finishChunkHashes(); - } - finishDelayedChunk(zookeeper); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); + /// If deduplicated data should not be inserted into MV, we need to set proper + /// value for `last_block_is_duplicate`, which is possible only after the part is committed. + /// Othervide we can delay commit. + /// TODO: we can also delay commit if there is no MVs. + if (!settings.deduplicate_blocks_in_dependent_materialized_views) + finishDelayedChunk(zookeeper); + ++num_blocks_processed; } @@ -439,6 +431,8 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF if (!delayed_chunk) return; + last_block_is_duplicate = false; + for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -451,6 +445,8 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF { bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num).second; + last_block_is_duplicate = last_block_is_duplicate || deduplicated; + /// Set a special error code if the block is duplicate int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); @@ -539,7 +535,7 @@ bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::Mutabl ProfileEventsScope profile_events_scope; String original_part_dir = part->getDataPartStorage().getPartDirectory(); - auto try_rollback_part_rename = [this, &part, &original_part_dir] () + auto try_rollback_part_rename = [this, &part, &original_part_dir]() { if (original_part_dir == part->getDataPartStorage().getPartDirectory()) return; @@ -1155,16 +1151,8 @@ void ReplicatedMergeTreeSinkImpl::onStart() template void ReplicatedMergeTreeSinkImpl::onFinish() { - const auto & settings = context->getSettingsRef(); - - ZooKeeperWithFaultInjectionPtr zookeeper = ZooKeeperWithFaultInjection::createInstance( - settings.insert_keeper_fault_injection_probability, - settings.insert_keeper_fault_injection_seed, - storage.getZooKeeper(), - "ReplicatedMergeTreeSink::onFinish", - log); - - finishDelayedChunk(zookeeper); + auto zookeeper = storage.getZooKeeper(); + finishDelayedChunk(std::make_shared(zookeeper)); } template diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 7d025361717..39623c20584 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -51,7 +51,7 @@ public: ~ReplicatedMergeTreeSinkImpl() override; void onStart() override; - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; String getName() const override { return "ReplicatedMergeTreeSink"; } @@ -59,6 +59,16 @@ public: /// For ATTACHing existing data on filesystem. bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part); + /// For proper deduplication in MaterializedViews + bool lastBlockIsDuplicate() const override + { + /// If MV is responsible for deduplication, block is not considered duplicating. + if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) + return false; + + return last_block_is_duplicate; + } + struct DelayedChunk; private: std::vector detectConflictsInAsyncBlockIDs(const std::vector & ids); @@ -116,6 +126,7 @@ private: bool allow_attach_while_readonly = false; bool quorum_parallel = false; const bool deduplicate = true; + bool last_block_is_duplicate = false; UInt64 num_blocks_processed = 0; LoggerPtr log; diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index 36899011e33..4fb81d69070 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -40,7 +40,7 @@ void MessageQueueSink::onFinish() producer->finish(); } -void MessageQueueSink::consume(Chunk & chunk) +void MessageQueueSink::consume(Chunk chunk) { const auto & columns = chunk.getColumns(); if (columns.empty()) diff --git a/src/Storages/MessageQueueSink.h b/src/Storages/MessageQueueSink.h index 4a9248c6c4d..b3c1e61734f 100644 --- a/src/Storages/MessageQueueSink.h +++ b/src/Storages/MessageQueueSink.h @@ -35,7 +35,7 @@ public: String getName() const override { return storage_name + "Sink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onStart() override; void onFinish() override; diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 8f0e2d76473..0b88a9e8929 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -644,13 +644,7 @@ bool StorageNATS::streamToViews() insert->table_id = table_id; // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter( - insert, - nats_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, nats_context, false, true, true); auto block_io = interpreter.execute(); auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index d2bdd0af302..f2f6eac333c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -39,12 +39,12 @@ StorageObjectStorageSink::StorageObjectStorageSink( configuration->format, *write_buf, sample_block, context, format_settings_); } -void StorageObjectStorageSink::consume(Chunk & chunk) +void StorageObjectStorageSink::consume(Chunk chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void StorageObjectStorageSink::onCancel() diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index 6ab531bb21a..e0081193686 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "StorageObjectStorageSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onCancel() override; diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 14b828e7268..4388864434e 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -454,13 +454,7 @@ bool StorageObjectStorageQueue::streamToViews() while (!shutdown_called && !file_iterator->isFinished()) { - InterpreterInsertQuery interpreter( - insert, - queue_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, queue_context, false, true, true); auto block_io = interpreter.execute(); auto read_from_format_info = prepareReadingFromFormat( block_io.pipeline.getHeader().getNames(), diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp index ee2570756ed..09b009b26d8 100644 --- a/src/Storages/PartitionedSink.cpp +++ b/src/Storages/PartitionedSink.cpp @@ -51,7 +51,7 @@ SinkPtr PartitionedSink::getSinkForPartitionKey(StringRef partition_key) return it->second; } -void PartitionedSink::consume(Chunk & chunk) +void PartitionedSink::consume(Chunk chunk) { const auto & columns = chunk.getColumns(); @@ -104,7 +104,7 @@ void PartitionedSink::consume(Chunk & chunk) for (const auto & [partition_key, partition_index] : partition_id_to_chunk_index) { auto sink = getSinkForPartitionKey(partition_key); - sink->consume(partition_index_to_chunk[partition_index]); + sink->consume(std::move(partition_index_to_chunk[partition_index])); } } diff --git a/src/Storages/PartitionedSink.h b/src/Storages/PartitionedSink.h index fcd67556dc9..68edeb6fd73 100644 --- a/src/Storages/PartitionedSink.h +++ b/src/Storages/PartitionedSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "PartitionedSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onException(std::exception_ptr exception) override; diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index 44479bd01e2..ba3cc6f58d0 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -697,13 +697,7 @@ void MaterializedPostgreSQLConsumer::syncTables() insert->table_id = storage->getStorageID(); insert->columns = std::make_shared(buffer->columns_ast); - InterpreterInsertQuery interpreter( - insert, - insert_context, - /* allow_materialized */ true, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, insert_context, true); auto io = interpreter.execute(); auto input = std::make_shared( result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows())); diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index f632e553a0d..2bb1e2dde0d 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -437,13 +437,7 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection auto insert_context = materialized_storage->getNestedTableContext(); - InterpreterInsertQuery interpreter( - insert, - insert_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, insert_context); auto block_io = interpreter.execute(); const StorageInMemoryMetadata & storage_metadata = nested_storage->getInMemoryMetadata(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index f3d2aff68c8..e4b19992151 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1129,13 +1129,7 @@ bool StorageRabbitMQ::tryStreamToViews() } // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter( - insert, - rabbitmq_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true); auto block_io = interpreter.execute(); block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 4b5188ca9f2..90792c59d38 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -218,7 +218,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali return {std::move(serialized_key_column), std::move(serialized_value_column)}; } -void EmbeddedRocksDBBulkSink::consume(Chunk & chunk_) +void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) { std::vector chunks_to_write = squash(std::move(chunk_)); @@ -247,10 +247,7 @@ void EmbeddedRocksDBBulkSink::onFinish() { /// If there is any data left, write it. if (!chunks.empty()) - { - Chunk empty; - consume(empty); - } + consume({}); } String EmbeddedRocksDBBulkSink::getTemporarySSTFilePath() diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h index 64190c8c86f..1f548e7813d 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h @@ -32,7 +32,7 @@ public: ~EmbeddedRocksDBBulkSink() override; - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp index 1f7f6939f40..c451cfd1bf5 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp @@ -29,7 +29,7 @@ EmbeddedRocksDBSink::EmbeddedRocksDBSink( serializations = getHeader().getSerializations(); } -void EmbeddedRocksDBSink::consume(Chunk & chunk) +void EmbeddedRocksDBSink::consume(Chunk chunk) { auto rows = chunk.getNumRows(); const auto & columns = chunk.getColumns(); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.h b/src/Storages/RocksDB/EmbeddedRocksDBSink.h index 2e1e0c7b429..011322df829 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.h @@ -17,7 +17,7 @@ public: StorageEmbeddedRocksDB & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; String getName() const override { return "EmbeddedRocksDBSink"; } private: diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 3473166a080..b9d3e071b6c 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -313,8 +313,7 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt Block block; while (executor.pull(block)) { - auto chunk = Chunk(block.getColumns(), block.rows()); - sink->consume(chunk); + sink->consume(Chunk{block.getColumns(), block.rows()}); } } diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index b064fba223a..a3f6b6afc5d 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -607,7 +607,7 @@ public: String getName() const override { return "BufferSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { size_t rows = chunk.getNumRows(); if (!rows) @@ -1020,13 +1020,7 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl auto insert_context = Context::createCopy(getContext()); insert_context->makeQueryContext(); - InterpreterInsertQuery interpreter( - insert, - insert_context, - allow_materialized, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter{insert, insert_context, allow_materialized}; auto block_io = interpreter.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 67586985ce8..849fa5dbe0b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1050,13 +1050,7 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu const auto & shard_info = shards_info[shard_index]; if (shard_info.isLocal()) { - InterpreterInsertQuery interpreter( - new_query, - query_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(new_query, query_context); pipeline.addCompletedPipeline(interpreter.execute().pipeline); } else diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 3fb397c7b81..7f39ff615f0 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1778,12 +1778,12 @@ public: String getName() const override { return "StorageFileSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { std::lock_guard cancel_lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void onCancel() override diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index c80e799a92b..20f99070000 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -119,10 +119,10 @@ public: std::string getName() const override { return "StorageKeeperMapSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -1248,10 +1248,7 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca Block block; while (executor.pull(block)) - { - auto chunk = Chunk(block.getColumns(), block.rows()); - sink->consume(chunk); - } + sink->consume(Chunk{block.getColumns(), block.rows()}); sink->finalize(strict); } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 463694c63aa..de0324d7998 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -22,6 +21,7 @@ #include #include +#include "StorageLogSettings.h" #include #include #include @@ -341,7 +341,7 @@ public: } } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; private: @@ -398,9 +398,9 @@ private: }; -void LogSink::consume(Chunk & chunk) +void LogSink::consume(Chunk chunk) { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); metadata_snapshot->check(block, true); for (auto & stream : streams | boost::adaptors::map_values) diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index b1bd7053c2e..f69c4adb552 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -63,7 +63,7 @@ public: String getName() const override { return "MemorySink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); storage_snapshot->metadata->check(block, true); diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index e0818fafae9..62a2a048642 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -106,12 +107,12 @@ public: String getName() const override { return "StorageMongoDBSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { Poco::MongoDB::Database db(db_name); Poco::MongoDB::Document::Vector documents; - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); size_t num_rows = block.rows(); size_t num_cols = block.columns(); diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 2a8a7bd2ee7..da391909dff 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -151,9 +151,9 @@ public: String getName() const override { return "StorageMySQLSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); auto blocks = splitBlocks(block, max_batch_rows); mysqlxx::Transaction trans(entry); try diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index cdfeab62b58..a8713c61e4d 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -227,9 +227,9 @@ public: String getName() const override { return "PostgreSQLSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); if (!inserter) { if (on_conflict.empty()) diff --git a/src/Storages/StorageRedis.cpp b/src/Storages/StorageRedis.cpp index 1a275320f43..83bb3c606c9 100644 --- a/src/Storages/StorageRedis.cpp +++ b/src/Storages/StorageRedis.cpp @@ -147,7 +147,7 @@ class RedisSink : public SinkToStorage public: RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; String getName() const override { return "RedisSink"; } private: @@ -169,10 +169,10 @@ RedisSink::RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadat } } -void RedisSink::consume(Chunk & chunk) +void RedisSink::consume(Chunk chunk) { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -567,8 +567,7 @@ void StorageRedis::mutate(const MutationCommands & commands, ContextPtr context_ Block block; while (executor.pull(block)) { - Chunk chunk(block.getColumns(), block.rows()); - sink->consume(chunk); + sink->consume(Chunk{block.getColumns(), block.rows()}); } } diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 85417a2f2a4..179e4cee199 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -141,7 +141,7 @@ public: String getName() const override { return "SQLiteSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString sqlbuf; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 0d094c15880..5b7f9fc0ac2 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -44,7 +44,7 @@ public: const String & backup_file_name_, bool persistent_); String getName() const override { return "SetOrJoinSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; private: @@ -82,9 +82,9 @@ SetOrJoinSink::SetOrJoinSink( { } -void SetOrJoinSink::consume(Chunk & chunk) +void SetOrJoinSink::consume(Chunk chunk) { - Block block = getHeader().cloneWithColumns(chunk.getColumns()); + Block block = getHeader().cloneWithColumns(chunk.detachColumns()); table.insertBlock(block, getContext()); if (persistent) diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 9b6d9f041e1..8df87d6290f 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -226,9 +226,9 @@ public: } } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - block_out->write(getHeader().cloneWithColumns(chunk.getColumns())); + block_out->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void onFinish() override diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 90e05c44e31..895da028fc2 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -565,12 +565,12 @@ StorageURLSink::StorageURLSink( } -void StorageURLSink::consume(Chunk & chunk) +void StorageURLSink::consume(Chunk chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void StorageURLSink::onCancel() diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 1804079e75f..fa7cc6eeeef 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -251,7 +251,7 @@ public: const String & method = Poco::Net::HTTPRequest::HTTP_POST); std::string getName() const override { return "StorageURLSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onCancel() override; void onException(std::exception_ptr exception) override; void onFinish() override; diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index c9c606de049..cb46cd19517 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -119,7 +119,7 @@ public: ZooKeeperSink(const Block & header, ContextPtr context) : SinkToStorage(header), zookeeper(context->getZooKeeper()) { } String getName() const override { return "ZooKeeperSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); size_t rows = block.rows(); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index e36247103c7..77e6ee9cb24 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -305,7 +304,7 @@ namespace public: explicit AddingAggregatedChunkInfoTransform(Block header) : ISimpleTransform(header, header, false) { } - void transform(Chunk & chunk) override { chunk.getChunkInfos().add(std::make_shared()); } + void transform(Chunk & chunk) override { chunk.setChunkInfo(std::make_shared()); } String getName() const override { return "AddingAggregatedChunkInfoTransform"; } }; @@ -690,13 +689,7 @@ inline void StorageWindowView::fire(UInt32 watermark) StoragePtr target_table = getTargetTable(); auto insert = std::make_shared(); insert->table_id = target_table->getStorageID(); - InterpreterInsertQuery interpreter( - insert, - getContext(), - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, getContext()); auto block_io = interpreter.execute(); auto pipe = Pipe(std::make_shared(blocks, header)); @@ -1420,7 +1413,7 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) } void StorageWindowView::writeIntoWindowView( - StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) + StorageWindowView & window_view, const Block & block, ContextPtr local_context) { window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) @@ -1435,7 +1428,7 @@ void StorageWindowView::writeIntoWindowView( window_view.max_watermark = window_view.getWindowUpperBound(first_record_timestamp); } - Pipe pipe(std::make_shared(block)); + Pipe pipe(std::make_shared(block.cloneEmpty(), Chunk(block.getColumns(), block.rows()))); UInt32 lateness_bound = 0; UInt32 t_max_watermark = 0; @@ -1480,10 +1473,10 @@ void StorageWindowView::writeIntoWindowView( auto syntax_result = TreeRewriter(local_context).analyze(query, columns); auto filter_expression = ExpressionAnalyzer(filter_function, syntax_result, local_context).getActionsDAG(false); - pipe.addSimpleTransform([&](const Block & header_) + pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( - header_, std::make_shared(filter_expression), + header, std::make_shared(filter_expression), filter_function->getColumnName(), true); }); } @@ -1538,30 +1531,6 @@ void StorageWindowView::writeIntoWindowView( QueryProcessingStage::WithMergeableState); builder = select_block.buildQueryPipeline(); - - builder.addSimpleTransform([&](const Block & stream_header) - { - // Can't move chunk_infos here, that function could be called several times - return std::make_shared(chunk_infos.clone(), stream_header); - }); - - String window_view_id = window_view.getStorageID().hasUUID() ? toString(window_view.getStorageID().uuid) : window_view.getStorageID().getFullNameNotQuoted(); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(window_view_id, stream_header); - }); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(stream_header); - }); - -#ifdef ABORT_ON_LOGICAL_ERROR - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared("StorageWindowView: Afrer tmp table before squashing", stream_header); - }); -#endif - builder.addSimpleTransform([&](const Block & current_header) { return std::make_shared( @@ -1601,13 +1570,6 @@ void StorageWindowView::writeIntoWindowView( lateness_upper_bound); }); -#ifdef ABORT_ON_LOGICAL_ERROR - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared("StorageWindowView: Afrer WatermarkTransform", stream_header); - }); -#endif - auto inner_table = window_view.getInnerTable(); auto lock = inner_table->lockForShare( local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); @@ -1624,16 +1586,9 @@ void StorageWindowView::writeIntoWindowView( auto convert_actions = std::make_shared( convert_actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); - builder.addSimpleTransform([&](const Block & header_) { return std::make_shared(header_, convert_actions); }); + builder.addSimpleTransform([&](const Block & header) { return std::make_shared(header, convert_actions); }); } -#ifdef ABORT_ON_LOGICAL_ERROR - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared("StorageWindowView: Before out", stream_header); - }); -#endif - builder.addChain(Chain(std::move(output))); builder.setSinks([&](const Block & cur_header, Pipe::StreamType) { diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 14ac65091d3..f79867df424 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -166,7 +166,7 @@ public: BlockIO populate(); - static void writeIntoWindowView(StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); + static void writeIntoWindowView(StorageWindowView & window_view, const Block & block, ContextPtr context); ASTPtr getMergeableQuery() const { return mergeable_query->clone(); } diff --git a/tests/integration/test_force_deduplication/test.py b/tests/integration/test_force_deduplication/test.py index 14c11bc8500..87b2c45bbc5 100644 --- a/tests/integration/test_force_deduplication/test.py +++ b/tests/integration/test_force_deduplication/test.py @@ -29,8 +29,6 @@ def get_counts(): def test_basic(start_cluster): - old_src, old_a, old_b, old_c = 0, 0, 0, 0 - node.query( """ CREATE TABLE test (A Int64) ENGINE = ReplicatedMergeTree ('/clickhouse/test/tables/test','1') ORDER BY tuple(); @@ -41,15 +39,6 @@ def test_basic(start_cluster): INSERT INTO test values(999); """ ) - - src, a, b, c = get_counts() - assert src == old_src + 1 - assert a == old_a + 2 - assert b == old_b + 2 - assert c == old_c + 2 - old_src, old_a, old_b, old_c = src, a, b, c - - # that issert fails on test_mv_b due to partitions by A with pytest.raises(QueryRuntimeException): node.query( """ @@ -57,51 +46,34 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(10); """ ) - src, a, b, c = get_counts() - assert src == old_src + 10 - assert a == old_a + 10 - assert b == old_b - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - # deduplication only for src table + old_src, old_a, old_b, old_c = get_counts() + # number of rows in test_mv_a and test_mv_c depends on order of inserts into views + assert old_src == 11 + assert old_a in (1, 11) + assert old_b == 1 + assert old_c in (1, 11) + node.query("INSERT INTO test SELECT number FROM numbers(10)") src, a, b, c = get_counts() - assert src == old_src - assert a == old_a + 10 - assert b == old_b + 10 - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - - # deduplication for MV tables does not work, because previous inserts have not written their deduplications tokens to the log due to `deduplicate_blocks_in_dependent_materialized_views = 0`. - node.query( - """ - SET deduplicate_blocks_in_dependent_materialized_views = 1; - INSERT INTO test SELECT number FROM numbers(10); - """ - ) - src, a, b, c = get_counts() - assert src == old_src - assert a == old_a + 10 - assert b == old_b + 10 - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - - # deduplication for all the tables - node.query( - """ - SET deduplicate_blocks_in_dependent_materialized_views = 1; - INSERT INTO test SELECT number FROM numbers(10); - """ - ) - src, a, b, c = get_counts() + # no changes because of deduplication in source table assert src == old_src assert a == old_a assert b == old_b assert c == old_c - old_src, old_a, old_b, old_c = src, a, b, c - # that issert fails on test_mv_b due to partitions by A, it is an uniq data which is not deduplicated + node.query( + """ + SET deduplicate_blocks_in_dependent_materialized_views = 1; + INSERT INTO test SELECT number FROM numbers(10); + """ + ) + src, a, b, c = get_counts() + assert src == 11 + assert a == old_a + 10 # first insert could be succesfull with disabled dedup + assert b == 11 + assert c == old_c + 10 + with pytest.raises(QueryRuntimeException): node.query( """ @@ -110,23 +82,16 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(100,10); """ ) - src, a, b, c = get_counts() - assert src == old_src + 10 - assert a == old_a + 10 - assert b == old_b - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - # deduplication for all tables, except test_mv_b. For test_mv_b it is an uniq data which is not deduplicated due to exception at previous insert node.query( """ SET deduplicate_blocks_in_dependent_materialized_views = 1; INSERT INTO test SELECT number FROM numbers(100,10); """ ) + src, a, b, c = get_counts() - assert src == old_src - assert a == old_a - assert b == old_b + 10 - assert c == old_c - old_src, old_a, old_b, old_c = src, a, b, c + assert src == 21 + assert a == old_a + 20 + assert b == 21 + assert c == old_c + 20 diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference index 9c9281dc7e4..adf6abb7298 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference @@ -1,7 +1,7 @@ 2 3 -3 +2 3 1 diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql index 51e6a513608..d3c4da86b41 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql @@ -29,7 +29,7 @@ INSERT INTO without_deduplication VALUES (43); SELECT count() FROM with_deduplication; SELECT count() FROM without_deduplication; --- Implicit insert isn't deduplicated, because deduplicate_blocks_in_dependent_materialized_views = 0 by default +-- Implicit insert isn't deduplicated SELECT ''; SELECT countMerge(cnt) FROM with_deduplication_mv; SELECT countMerge(cnt) FROM without_deduplication_mv; diff --git a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh index 8f7d19028b0..1fb219108da 100755 --- a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh +++ b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh @@ -36,8 +36,8 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE c" echo ${CLICKHOUSE_CLIENT} --query "CREATE TABLE root (d UInt64) ENGINE = Null" ${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW d (d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/d', '1') ORDER BY d AS SELECT * FROM root" -${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; -${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; ${CLICKHOUSE_CLIENT} --query "SELECT * FROM d"; ${CLICKHOUSE_CLIENT} --query "DROP TABLE root" ${CLICKHOUSE_CLIENT} --query "DROP TABLE d" diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index dadf2f35e6e..a9801e3b910 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -137,7 +137,7 @@ select arrayUniq(thread_ids) from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '0' and Settings['max_insert_threads'] = '16'; -18 +5 select count() from testX; 60 select count() from testXA; @@ -185,7 +185,7 @@ select arrayUniq(thread_ids) from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '1' and Settings['max_insert_threads'] = '16'; -18 +5 select count() from testX; 80 select count() from testXA; diff --git a/tests/queries/0_stateless/01927_query_views_log_current_database.sql b/tests/queries/0_stateless/01927_query_views_log_current_database.sql index 6287156daaf..ba42795333c 100644 --- a/tests/queries/0_stateless/01927_query_views_log_current_database.sql +++ b/tests/queries/0_stateless/01927_query_views_log_current_database.sql @@ -16,7 +16,6 @@ CREATE MATERIALIZED VIEW matview_b_to_c TO table_c AS SELECT SUM(a + sleepEachRo CREATE MATERIALIZED VIEW matview_join_d_e TO table_f AS SELECT table_d.a as a, table_e.count + sleepEachRow(0.000003) as count FROM table_d LEFT JOIN table_e ON table_d.a = table_e.a; -- ENABLE LOGS -SET parallel_view_processing=0; SET log_query_views=1; SET log_queries_min_type='QUERY_FINISH'; SET log_queries=1; diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference index 2d9f236ada9..e0cc8f0ce63 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference @@ -1,8 +1,8 @@ -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 -18 36 27 36 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data -18 18 18 18 -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 -18 36 27 36 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent +18 18 9 18 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent +18 9 9 9 +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent +18 18 9 18 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent 18 18 18 18 diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql index 465c8d6136c..fdd75b91b1f 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql @@ -1,6 +1,6 @@ -- Tags: long -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent'; drop table if exists test sync; drop table if exists test_mv_a sync; @@ -35,7 +35,7 @@ select (select sum(c) from test_mv_c where test='case1'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent'; set deduplicate_blocks_in_dependent_materialized_views=1; @@ -53,7 +53,7 @@ select (select sum(c) from test_mv_c where test='case2'); -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent'; set deduplicate_blocks_in_dependent_materialized_views=0; @@ -70,7 +70,7 @@ select (select sum(c) from test_mv_b where test='case3'), (select sum(c) from test_mv_c where test='case3'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent'; set deduplicate_blocks_in_dependent_materialized_views=1; diff --git a/tests/queries/0_stateless/02125_query_views_log.sql b/tests/queries/0_stateless/02125_query_views_log.sql index ba50902ebea..d2d19b76a1f 100644 --- a/tests/queries/0_stateless/02125_query_views_log.sql +++ b/tests/queries/0_stateless/02125_query_views_log.sql @@ -8,7 +8,7 @@ create table dst (key Int) engine=Null(); create materialized view mv1 to dst as select * from src; create materialized view mv2 to dst as select * from src; -insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=0; +insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=1; system flush logs; -- { echo } diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference index 07deb7c2565..335b55f05c8 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference @@ -10,14 +10,13 @@ 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 84 -2023-09-01 12:00:00 42 +2022-09-01 12:00:00 42 -- Original issue with deduplicate_blocks_in_dependent_materialized_views = 1 AND max_insert_delayed_streams_for_parallel_write > 1 -- Landing 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 84 +2022-09-01 12:00:00 42 2023-09-01 12:00:00 42 -- Regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184 -- Landing (Agg/Replacing)MergeTree diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql index a2378fd8f67..f206f0d7775 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql @@ -54,9 +54,8 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view - 1st insert works for landing and mv tables - 2nd insert gets first block 20220901 deduplicated and second one inserted in landing table - - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded, now that block is inserted because deduplicate_blocks_in_dependent_materialized_views=0 + - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded - Now it is fixed. */ SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 1000; @@ -98,7 +97,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view This is what happens now: - 1st insert works for landing and mv tables - - 2nd insert gets first block 20220901 deduplicated for landing and both rows are inserted for mv tables + - 2nd insert gets first block 20220901 deduplicated and second one inserted for landing and mv tables */ SET deduplicate_blocks_in_dependent_materialized_views = 1, max_insert_delayed_streams_for_parallel_write = 1000; diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python deleted file mode 100644 index dd1058518c9..00000000000 --- a/tests/queries/0_stateless/03008_deduplication.python +++ /dev/null @@ -1,657 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys -import argparse -import string - - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - - -def __format(template, **params): - field_names = [v[1] for v in string.Formatter().parse(template) if v[1] is not None] - kv_args = {} - for field in field_names: - if field in params: - kv_args[field] = params[field] - else: - kv_args[field] = "" - - return template.format(**kv_args) - - -def instance_create_statement( - table_name, - table_columns, - table_keys, - table_engine, - with_deduplication, - no_merges=True, -): - template = """ - CREATE TABLE {table_name} - {table_columns} - ENGINE = {table_engine} - ORDER BY {table_keys} - {table_settings}; - {table_no_merges} - """ - - params = dict() - params["table_name"] = table_name - params["table_columns"] = table_columns - params["table_keys"] = table_keys - params["table_no_merges"] = f"SYSTEM STOP MERGES {table_name};" if no_merges else "" - params["table_engine"] = ( - "MergeTree()" - if table_engine == "MergeTree" - else f"ReplicatedMergeTree('/clickhouse/tables/{{database}}/{table_name}', '1')" - ) - - deduplication_window_setting_name = ( - "non_replicated_deduplication_window" - if table_engine == "MergeTree" - else "replicated_deduplication_window" - ) - deduplication_window_setting_value = 1000 if with_deduplication else 0 - - settings = list() - settings += [ - f"{deduplication_window_setting_name}={deduplication_window_setting_value}" - ] - params["table_settings"] = "SETTINGS " + ",".join(settings) - - return __format(template, **params) - - -def instance_insert_statement( - table_name, count, insert_method, insert_unique_blocks, use_insert_token -): - insert_settings = ( - "" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'" - ) - - if insert_method == "InsertSelect": - template = """ - INSERT INTO {table_name} - SELECT {insert_columns} - FROM numbers({count}) {insert_settings}; - """ - return __format( - template, - table_name=table_name, - count=count, - insert_columns="'src_4', 4" - if not insert_unique_blocks - else "'src_' || toString(number), number", - insert_settings=insert_settings, - ) - - else: - template = """ - INSERT INTO {table_name} - {insert_settings} VALUES {insert_values}; - """ - - values = [] - for i in range(count): - values += ( - [f"('src_{i}', {i})"] if insert_unique_blocks else ["('src_4', 4)"] - ) - insert_values = ", ".join(values) - - return __format( - template, - table_name=table_name, - insert_settings=insert_settings, - insert_values=insert_values, - ) - - -def get_drop_tables_statements(tables): - return "".join( - [f"DROP TABLE IF EXISTS {table_name};\n" for table_name in tables[::-1]] - ) - - -def get_logs_statement(args): - if args.get_logs: - return "SET send_logs_level='test';" - return "" - - -def str2bool(v): - if isinstance(v, bool): - return v - if v.lower() in ("yes", "true", "t", "y", "1"): - return True - elif v.lower() in ("no", "false", "f", "n", "0"): - return False - else: - raise argparse.ArgumentTypeError("Boolean value expected.") - - -class ArgsFactory: - def __init__(self, parser): - self.__parser = parser - - def add_opt_engine(self): - self.__parser.add_argument( - "--table-engine", - choices=["ReplicatedMergeTree", "MergeTree"], - default="MergeTree", - ) - - def add_opt_user_token(self): - self.__parser.add_argument( - "--use-insert-token", type=str2bool, nargs="?", const=True, default=False - ) - - def add_opt_single_thread(self): - self.__parser.add_argument( - "--single-thread", type=str2bool, nargs="?", const=True, default=True - ) - - def add_opt_dedup_src(self): - self.__parser.add_argument( - "--deduplicate-src-table", - type=str2bool, - nargs="?", - const=True, - default=True, - ) - - def add_opt_dedup_dst(self): - self.__parser.add_argument( - "--deduplicate-dst-table", - type=str2bool, - nargs="?", - const=True, - default=True, - ) - - def add_opt_get_logs(self): - self.__parser.add_argument( - "--get-logs", type=str2bool, nargs="?", const=True, default=False - ) - - def add_opt_uniq_blocks(self): - self.__parser.add_argument( - "--insert-unique-blocks", type=str2bool, nargs="?", const=True, default=True - ) - - def add_opt_insert_method(self): - self.__parser.add_argument( - "--insert-method", - choices=["InsertSelect", "InsertValues"], - default="InsertSelect", - ) - - def add_all(self): - self.add_opt_engine() - self.add_opt_user_token() - self.add_opt_single_thread() - self.add_opt_dedup_src() - self.add_opt_dedup_dst() - self.add_opt_get_logs() - self.add_opt_insert_method() - self.add_opt_uniq_blocks() - - -def test_insert_several_blocks(parser): - ArgsFactory(parser).add_all() - - def calle(args): - create_table_a_b_statement = instance_create_statement( - table_name="table_a_b", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_src_table, - ) - - create_table_when_b_even_statement = instance_create_statement( - table_name="table_when_b_even", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_dst_table, - ) - - create_mv_statement = """ - CREATE MATERIALIZED VIEW mv_b_even - TO table_when_b_even - AS - SELECT a, b - FROM table_a_b - WHERE b % 2 = 0; - """ - - drop_tables_statements = get_drop_tables_statements( - ["table_a_b", "table_when_b_even", "mv_b_even"] - ) - - insert_statement = instance_insert_statement( - "table_a_b", - 10, - args.insert_method, - args.insert_unique_blocks, - args.use_insert_token, - ) - - print_details_statements = f""" - SELECT 'table_a_b'; - SELECT 'count', count() FROM table_a_b; - {"" if not args.get_logs else "SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part;"} - - SELECT 'table_when_b_even'; - SELECT 'count', count() FROM table_when_b_even; - {"" if not args.get_logs else "SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part;"} - """ - - if args.insert_unique_blocks: - assert_first_insert_statements = f""" - SELECT throwIf( count() != 10 ) - FROM table_a_b; - SELECT throwIf( count() != 5 ) - FROM table_when_b_even; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) - FROM table_a_b; - SELECT throwIf( count() != {5 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even; - """ - else: - if args.use_insert_token: - assert_first_insert_statements = """ - SELECT throwIf( count() != 10 ) - FROM table_a_b; - SELECT throwIf( count() != 10 ) - FROM table_when_b_even; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) - FROM table_a_b; - SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even; - """ - else: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 20} ) - FROM table_a_b; - SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even; - """ - - script = f""" - {get_logs_statement(args)} - - SET max_insert_threads={1 if args.single_thread else 10}; - SET update_insert_deduplication_token_in_dependent_materialized_views=1; - SET deduplicate_blocks_in_dependent_materialized_views=1; - - SET max_block_size=1; - SET min_insert_block_size_rows=0; - SET min_insert_block_size_bytes=0; - - {drop_tables_statements} - - {create_table_a_b_statement} - - {create_table_when_b_even_statement} - - {create_mv_statement} - - -- first insert - {insert_statement} - - {print_details_statements} - - {assert_first_insert_statements} - - -- second insert, it is retry - {insert_statement} - - {print_details_statements} - - {assert_second_insert_statements} - - {drop_tables_statements} - """ - - print(script) - - parser.set_defaults(func=calle) - - -def test_mv_generates_several_blocks(parser): - ArgsFactory(parser).add_all() - - def calle(args): - tables = [ - "table_for_join_with", - "table_a_b", - "table_when_b_even_and_joined", - "mv_b_even", - ] - drop_tables_statements = get_drop_tables_statements(tables) - - details_print_for_table_for_join_with = "" - if args.get_logs: - details_print_for_table_for_join_with = """ - SELECT 'table_for_join_with'; - SELECT a_join, b, _part FROM table_for_join_with ORDER BY _part, a_join, b; - """ - - create_table_a_b_statement = instance_create_statement( - table_name="table_a_b", - table_columns="(a_src String, b UInt64)", - table_keys="(a_src, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_src_table, - ) - - create_table_when_b_even_and_joined_statement = instance_create_statement( - table_name="table_when_b_even_and_joined", - table_columns="(a_src String, a_join String, b UInt64)", - table_keys="(a_src, a_join, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_dst_table, - ) - - insert_statement = instance_insert_statement( - "table_a_b", - 5, - args.insert_method, - args.insert_unique_blocks, - args.use_insert_token, - ) - - details_print_statements = f""" - SELECT 'table_a_b'; - SELECT 'count', count() FROM table_a_b; - - SELECT 'table_when_b_even_and_joined'; - SELECT 'count', count() FROM table_when_b_even_and_joined; - {"" if not args.get_logs else "SELECT _part, a_src, a_join, b FROM table_when_b_even_and_joined ORDER BY _part;"} - """ - - if args.insert_unique_blocks: - assert_first_insert_statements = f""" - SELECT throwIf( count() != 5 ) - FROM table_a_b; - - SELECT throwIf( count() != 9 ) - FROM table_when_b_even_and_joined; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - - SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 18} ) - FROM table_when_b_even_and_joined; - """ - else: - if args.use_insert_token: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {5 if args.deduplicate_src_table else 5} ) - FROM table_a_b; - - SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even_and_joined; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - - SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even_and_joined; - """ - else: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 5} ) - FROM table_a_b; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even_and_joined; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even_and_joined; - """ - - script = f""" - {get_logs_statement(args)} - - SET max_insert_threads={1 if args.single_thread else 10}; - SET update_insert_deduplication_token_in_dependent_materialized_views=1; - SET deduplicate_blocks_in_dependent_materialized_views=1; - - SET max_block_size=1; - SET min_insert_block_size_rows=0; - SET min_insert_block_size_bytes=0; - - {drop_tables_statements} - - CREATE TABLE table_for_join_with - (a_join String, b UInt64) - ENGINE = MergeTree() - ORDER BY (a_join, b); - INSERT INTO table_for_join_with - SELECT 'joined_' || toString(number), number - FROM numbers(1); - {details_print_for_table_for_join_with} - - {create_table_a_b_statement} - SYSTEM STOP MERGES table_a_b; - - {create_table_when_b_even_and_joined_statement} - SYSTEM STOP MERGES table_when_b_even_and_joined; - - CREATE MATERIALIZED VIEW mv_b_even - TO table_when_b_even_and_joined - AS - SELECT a_src, a_join, table_for_join_with.b as b - FROM table_a_b - FULL OUTER JOIN table_for_join_with - ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 - ORDER BY a_src, a_join, b; - - -- first insert - {insert_statement} - - {details_print_statements} - - -- first assertion - {assert_first_insert_statements} - - -- second insert - {insert_statement} - - {details_print_statements} - - -- second assertion - {assert_second_insert_statements} - - {drop_tables_statements} - """ - - print(script) - - parser.set_defaults(func=calle) - - -def test_several_mv_into_one_table(parser): - ArgsFactory(parser).add_all() - - def calle(args): - tables = ["table_src", "table_dst", "mv_b_even", "mv_b_even_even"] - drop_tables_statements = get_drop_tables_statements(tables) - - create_table_src_statement = instance_create_statement( - table_name="table_src", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_src_table, - ) - - create_table_dst_statement = instance_create_statement( - table_name="table_dst", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_dst_table, - ) - - insert_statement = instance_insert_statement( - "table_src", - 8, - args.insert_method, - args.insert_unique_blocks, - args.use_insert_token, - ) - - details_print_statements = f""" - SELECT 'table_src count', count() FROM table_src; - - SELECT 'table_dst count', count() FROM table_dst; - {"" if not args.get_logs else "SELECT _part, count() FROM table_dst GROUP BY _part ORDER BY _part;"} - """ - - if args.insert_unique_blocks: - assert_first_insert_statements = f""" - SELECT throwIf( count() != 8 ) - FROM table_src; - - SELECT throwIf( count() != 6 ) - FROM table_dst; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) - FROM table_src; - - SELECT throwIf( count() != {6 if args.deduplicate_dst_table else 12} ) - FROM table_dst; - """ - else: - if args.use_insert_token: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {8 if args.deduplicate_src_table else 8} ) - FROM table_src; - - SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 16} ) - FROM table_dst; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) - FROM table_src; - - SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 32} ) - FROM table_dst; - """ - else: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 8} ) - FROM table_src; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 16} ) - FROM table_dst; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 16} ) - FROM table_src; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 32} ) - FROM table_dst; - """ - - script = f""" - {get_logs_statement(args)} - - SET max_insert_threads={1 if args.single_thread else 10}; - SET update_insert_deduplication_token_in_dependent_materialized_views=1; - SET deduplicate_blocks_in_dependent_materialized_views=1; - - SET max_block_size=1; - SET min_insert_block_size_rows=0; - SET min_insert_block_size_bytes=0; - - {drop_tables_statements} - - {create_table_src_statement} - - {create_table_dst_statement} - - CREATE MATERIALIZED VIEW mv_b_even - TO table_dst - AS - SELECT a, b - FROM table_src - WHERE b % 2 = 0; - - CREATE MATERIALIZED VIEW mv_b_even_even - TO table_dst - AS - SELECT a, b - FROM table_src - WHERE b % 4 = 0; - - -- first insert - {insert_statement} - - {details_print_statements} - - {assert_first_insert_statements} - - -- second insert, retry - {insert_statement} - - {details_print_statements} - - {assert_second_insert_statements} - - {drop_tables_statements} - """ - - print(script) - - parser.set_defaults(func=calle) - - -def parse_args(): - parser = argparse.ArgumentParser() - subparsers = parser.add_subparsers(dest="test") - test_insert_several_blocks( - subparsers.add_parser("insert_several_blocks_into_table") - ) - test_mv_generates_several_blocks( - subparsers.add_parser("mv_generates_several_blocks") - ) - test_several_mv_into_one_table(subparsers.add_parser("several_mv_into_one_table")) - args = parser.parse_args() - if args.test is None: - parser.print_help() - return args - - -def main(): - args = parse_args() - if args.test is not None: - args.func(args) - - -if __name__ == "__main__": - main() diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference deleted file mode 100644 index 4893274c1cd..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference +++ /dev/null @@ -1,41 +0,0 @@ -Different materialized view insert into one underlayed table equal data. -first attempt -from dst 1 A all_1_1_0 -from mv_dst 0 A all_1_1_0 -from mv_dst 0 A all_2_2_0 -second attempt -from dst 1 A all_1_1_0 -from mv_dst 0 A all_1_1_0 -from mv_dst 0 A all_2_2_0 -Different insert operations generate the same data after transformation in underlied table of materialized view. -first attempt -from dst 1 A all_1_1_0 -from mv_dst 0 A all_1_1_0 -second attempt -from dst 1 A all_1_1_0 -from dst 2 A all_2_2_0 -from mv_dst 0 A all_1_1_0 -from mv_dst 0 A all_2_2_0 -Indentical blocks in insertion with `insert_deduplication_token` -first attempt -from dst 0 A all_1_1_0 -from dst 0 A all_2_2_0 -second attempt -from dst 0 A all_1_1_0 -from dst 0 A all_2_2_0 -third attempt -from dst 0 A all_1_1_0 -from dst 0 A all_2_2_0 -Indentical blocks in insertion -from dst 0 A all_1_1_0 -Indentical blocks after materialised view`s transformation -first attempt -from dst 1 B all_1_1_0 -from dst 2 B all_2_2_0 -from mv_dst 0 B all_1_1_0 -from mv_dst 0 B all_2_2_0 -second attempt -from dst 1 B all_1_1_0 -from dst 2 B all_2_2_0 -from mv_dst 0 B all_1_1_0 -from mv_dst 0 B all_2_2_0 diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql deleted file mode 100644 index 7927a6b1edf..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql +++ /dev/null @@ -1,331 +0,0 @@ --- ######### -select 'Different materialized view insert into one underlayed table equal data.'; - -DROP TABLE IF EXISTS dst; -DROP TABLE IF EXISTS mv_dst; -DROP TABLE IF EXISTS mv_first; -DROP TABLE IF EXISTS mv_second; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE TABLE mv_dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE MATERIALIZED VIEW mv_first -TO mv_dst -AS SELECT - 0 AS key, - value AS value -FROM dst; - -CREATE MATERIALIZED VIEW mv_second -TO mv_dst -AS SELECT - 0 AS key, - value AS value -FROM dst; - -SET deduplicate_blocks_in_dependent_materialized_views=1; - -select 'first attempt'; - -INSERT INTO dst VALUES (1, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst VALUES (1, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -DROP TABLE mv_second; -DROP TABLE mv_first; -DROP TABLE mv_dst; -DROP TABLE dst; - - --- ######### -select 'Different insert operations generate the same data after transformation in underlied table of materialized view.'; - -DROP TABLE IF EXISTS dst; -DROP TABLE IF EXISTS mv_dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE MATERIALIZED VIEW mv_dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000 -AS SELECT - 0 AS key, - value AS value -FROM dst; - -SET deduplicate_blocks_in_dependent_materialized_views=1; - -select 'first attempt'; - -INSERT INTO dst VALUES (1, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst VALUES (2, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -DROP TABLE mv_dst; -DROP TABLE dst; - - --- ######### -select 'Indentical blocks in insertion with `insert_deduplication_token`'; - -DROP TABLE IF EXISTS dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - -select 'first attempt'; - -INSERT INTO dst SELECT - 0 AS key, - 'A' AS value -FROM numbers(2) -SETTINGS insert_deduplication_token='some_user_token'; - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst SELECT - 0 AS key, - 'A' AS value -FROM numbers(2) -SETTINGS insert_deduplication_token='some_user_token'; - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -select 'third attempt'; - -INSERT INTO dst SELECT - 1 AS key, - 'b' AS value -FROM numbers(2) -SETTINGS insert_deduplication_token='some_user_token'; - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -DROP TABLE dst; - - --- ######### -select 'Indentical blocks in insertion'; - -DROP TABLE IF EXISTS dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - -INSERT INTO dst SELECT - 0 AS key, - 'A' AS value -FROM numbers(2); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -DROP TABLE dst; - - --- ######### -select 'Indentical blocks after materialised view`s transformation'; - -DROP TABLE IF EXISTS dst; -DROP TABLE IF EXISTS mv_dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE MATERIALIZED VIEW mv_dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000 -AS SELECT - 0 AS key, - value AS value -FROM dst; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - -SET deduplicate_blocks_in_dependent_materialized_views=1; - -select 'first attempt'; - -INSERT INTO dst SELECT - number + 1 AS key, - IF(key = 0, 'A', 'B') AS value -FROM numbers(2); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst SELECT - number + 1 AS key, - IF(key = 0, 'A', 'B') AS value -FROM numbers(2); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -DROP TABLE mv_dst; -DROP TABLE dst; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference deleted file mode 100644 index c82a6eaa213..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference +++ /dev/null @@ -1,35 +0,0 @@ -no user deduplication token -partitioned_table is deduplicated bacause deduplication works in scope of one partiotion: -1 A -1 D -2 B -2 C -mv_table is not deduplicated because the inserted blocks was different: -1 A -1 A -1 D -2 B -2 B -2 C -with user deduplication token -partitioned_table is not deduplicated because different tokens: -1 A -1 A -1 D -2 B -2 B -2 C -mv_table is not deduplicated because different tokens: -1 A -1 A -1 D -2 B -2 B -2 C -with incorrect ussage of user deduplication token -partitioned_table is deduplicated because equal tokens: -1 A -2 B -mv_table is deduplicated because equal tokens: -1 A -2 B diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql deleted file mode 100644 index 2eb931f7f73..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql +++ /dev/null @@ -1,83 +0,0 @@ -DROP TABLE IF EXISTS partitioned_table; -DROP TABLE IF EXISTS mv_table; - - -SET deduplicate_blocks_in_dependent_materialized_views = 1; - - -SELECT 'no user deduplication token'; - -CREATE TABLE partitioned_table - (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') - partition by key % 10 - order by tuple(); - -CREATE MATERIALIZED VIEW mv_table (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') - ORDER BY tuple() - AS SELECT key, value FROM partitioned_table; - -INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'B'); -INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'C'); -INSERT INTO partitioned_table VALUES (1, 'D'), (2, 'B'); - -SELECT 'partitioned_table is deduplicated bacause deduplication works in scope of one partiotion:'; -SELECT * FROM partitioned_table ORDER BY ALL; -SELECT 'mv_table is not deduplicated because the inserted blocks was different:'; -SELECT * FROM mv_table ORDER BY ALL; - -DROP TABLE partitioned_table; -DROP TABLE mv_table; - - -SELECT 'with user deduplication token'; - -CREATE TABLE partitioned_table - (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') - partition by key % 10 - order by tuple(); - -CREATE MATERIALIZED VIEW mv_table (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') - ORDER BY tuple() - AS SELECT key, value FROM partitioned_table; - -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_1' VALUES (1, 'A'), (2, 'B'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_2' VALUES (1, 'A'), (2, 'C'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_3' VALUES (1, 'D'), (2, 'B'); - -SELECT 'partitioned_table is not deduplicated because different tokens:'; -SELECT * FROM partitioned_table ORDER BY ALL; -SELECT 'mv_table is not deduplicated because different tokens:'; -SELECT * FROM mv_table ORDER BY ALL; - -DROP TABLE partitioned_table; -DROP TABLE mv_table; - - -SELECT 'with incorrect ussage of user deduplication token'; - -CREATE TABLE partitioned_table - (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') - partition by key % 10 - order by tuple(); - -CREATE MATERIALIZED VIEW mv_table (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') - ORDER BY tuple() - AS SELECT key, value FROM partitioned_table; - -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'B'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'C'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'D'), (2, 'B'); - -SELECT 'partitioned_table is deduplicated because equal tokens:'; -SELECT * FROM partitioned_table ORDER BY ALL; -SELECT 'mv_table is deduplicated because equal tokens:'; -SELECT * FROM mv_table ORDER BY ALL; - -DROP TABLE partitioned_table; -DROP TABLE mv_table; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference deleted file mode 100644 index bf900aa84d2..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh deleted file mode 100755 index 49eb52b47fd..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="MergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference deleted file mode 100644 index c815324b455..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh deleted file mode 100755 index 53af06d4a6f..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="ReplicatedMergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference deleted file mode 100644 index 6e76ec46aa8..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh deleted file mode 100755 index 7d4f5240cd1..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="MergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference deleted file mode 100644 index a25e8713c61..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh deleted file mode 100755 index 109d1674f3a..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="ReplicatedMergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 20: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference deleted file mode 100644 index b6a3e0175a7..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference +++ /dev/null @@ -1,706 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh deleted file mode 100755 index fe3d610a758..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="MergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference deleted file mode 100644 index 1921103f49e..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference +++ /dev/null @@ -1,706 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh deleted file mode 100755 index 9adee6d53d4..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="ReplicatedMergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03035_max_insert_threads_support.sh b/tests/queries/0_stateless/03035_max_insert_threads_support.sh index cedb651a430..1e6bfb414d8 100755 --- a/tests/queries/0_stateless/03035_max_insert_threads_support.sh +++ b/tests/queries/0_stateless/03035_max_insert_threads_support.sh @@ -8,7 +8,7 @@ DATA_FILE="data_$CLICKHOUSE_TEST_UNIQUE_NAME.csv" $CLICKHOUSE_CLIENT --max_insert_threads=4 --query=" EXPLAIN PIPELINE INSERT INTO FUNCTION file('$DATA_FILE') SELECT * FROM numbers_mt(1000000) ORDER BY number DESC -" | grep -o StorageFileSink | wc -l +" | grep -o MaterializingTransform | wc -l DATA_FILE_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path from file('$DATA_FILE', 'One')") rm $DATA_FILE_PATH From 7180ae03467b05fb0495d744c066d4df758c37a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Fri, 5 Jul 2024 13:18:48 +0000 Subject: [PATCH 272/273] Add `use_same_s3_credentials_for_base_backup` to docs --- docs/en/operations/backup.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 2ba50b39934..7c102c38fa6 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -84,6 +84,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des - [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level - `password` for the file on disk - `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')` + - `use_same_s3_credentials_for_base_backup`: whether base backup to S3 should inherit credentials from the query. Only works with `S3`. - `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables - `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family. - `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD` From d3f23c2753ff2b1ac935268c1af0609616381782 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 5 Jul 2024 13:29:34 +0000 Subject: [PATCH 273/273] Bump s2geometry again --- contrib/s2geometry | 2 +- contrib/s2geometry-cmake/CMakeLists.txt | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/contrib/s2geometry b/contrib/s2geometry index 0146e2d1355..6522a40338d 160000 --- a/contrib/s2geometry +++ b/contrib/s2geometry @@ -1 +1 @@ -Subproject commit 0146e2d1355828f8f633cb050948250ad7406c57 +Subproject commit 6522a40338d58752c2a4227a3fc2bc4107c73e43 diff --git a/contrib/s2geometry-cmake/CMakeLists.txt b/contrib/s2geometry-cmake/CMakeLists.txt index 5eabe71b538..48562b8cead 100644 --- a/contrib/s2geometry-cmake/CMakeLists.txt +++ b/contrib/s2geometry-cmake/CMakeLists.txt @@ -1,7 +1,6 @@ option(ENABLE_S2_GEOMETRY "Enable S2 Geometry" ${ENABLE_LIBRARIES}) -# ARCH_S390X broke upstream, it can be re-enabled once https://github.com/google/s2geometry/pull/372 is merged -if (NOT ENABLE_S2_GEOMETRY OR ARCH_S390X) +if (NOT ENABLE_S2_GEOMETRY) message(STATUS "Not using S2 Geometry") return() endif()